PH Covid-19 Data Analysis

By: Marlon Teodosio

Last update: May 28, 2020 18:19:18
In [4]:
from IPython.display import HTML

HTML('''<script>
  function code_toggle() {
    if (code_shown){
      $('div.input').hide('500');
      $('#toggleButton').val('Show Code')
    } else {
      $('div.input').show('500');
      $('#toggleButton').val('Hide Code')
    }
    code_shown = !code_shown
  }

  $( document ).ready(function(){
    code_shown=false;
    $('div.input').hide()
  });
</script>
<form action="javascript:code_toggle()"><input type="submit" id="toggleButton" value="Show Code"></form>''')
Out[4]:

Data Source

The data is extracted from the official DOH COVID-19 DataDrop.

In [5]:
#########################################################################
########################## 05 Case Information ##########################
#########################################################################

data_src = 'https://drive.google.com/open?id=1fed36fegqVEQM-h3WhskR0oCELI2zQ73' # Get shareable link from DOH Data Drop
file_id = data_src.split('=')[1]
dwn_url='https://drive.google.com/uc?export=download&id=' + file_id
url = requests.get(dwn_url).text
csv_raw = StringIO(url)
covid_ph = pd.read_csv(csv_raw, encoding = 'utf-8')

for col in covid_ph.columns:
    if 'Date' in col:
        covid_ph[col] = pd.to_datetime(covid_ph[col], dayfirst=True)

covid_ph['CityMunRes'] = covid_ph['CityMunRes'].str.replace(u\x91', 'Ñ')
covid_ph['CityMunRes'] = covid_ph['CityMunRes'].str.replace(u\x83±', 'Ñ')
covid_ph['RemovalType'] = covid_ph['RemovalType'].fillna('Active')

############################## CHECKER #################################
# covid_ph[covid_ph.RegionRes == 'NCR'].groupby('CityMunRes')['CaseCode'].count().sort_values(ascending=False).index.tolist()
Case Information Data as of: May 26
Facility Testing Data as of: May 25
Hospital, Bed & MechVent Data as of: May 25

Data Analysis

PH Latest Statistics

In [12]:
total_cases = len(covid_ph)
recovered = len(covid_ph.loc[covid_ph.RemovalType=='Recovered'])
died = len(covid_ph.loc[covid_ph.RemovalType=='Died'])
active_cases = total_cases - recovered - died
percent_active = active_cases / total_cases
percent_recovered = recovered / total_cases
percent_died = died / total_cases
asof = pd.to_datetime(covid_ph.DateRepConf.unique().max()) # latest available data
new = len(covid_ph[covid_ph.DateRepConf == asof])
In [14]:
data = pd.DataFrame(data=[[active_cases, percent_active, f"{100*percent_active:.1f}%"],
                          [died, percent_died, f"{100*percent_died:.1f}%"],
                          [recovered, percent_recovered, f"{100*percent_recovered:.1f}%"]],
                    index=['Active', 'Died', 'Recovered'], 
                    columns=['count', 'percent', 'pct']).reset_index()
data['cum_percent'] = data.percent.cumsum()
data['new'] = pd.Series([cases_daily[-1]-deaths_daily[-1]-recoveries_daily[-1], deaths_daily[-1], recoveries_daily[-1]])

# Display the Dashboard
print('\033[1m' + '\033[94m' + "PH COVID-19 Tracker" + '\033[0m')
print("As of", asof.date().strftime("%b %d, %Y"))
fig = px.bar(data, x='percent', orientation='h', color='index', 
             text='count', hover_name='pct', height=250, 
             labels={'percent': " ", 'index':'PatientType'})
fig.update_layout(legend=dict(orientation='h', x=-0.01, y=-0.1, title="", font_size=14), 
                  title=("Total Confirmed Cases: " + f"<b>{total_cases}</b>"), 
                  hovermode=False)
fig.for_each_trace(lambda t: t.update(textfont_color='white', textfont_size=16))
for i in range(0, len(data.percent.unique())):
    fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
                            arrowhead=0, xanchor='right', yanchor='middle',
                            ax=0, ay=0,
                            x=data.cum_percent.unique()[i]-0.001, y=0.6,
                            text=data.pct[i]), font_size=12,
                       font_color = px.colors.qualitative.Plotly[i])
    fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
                            arrowhead=0, xanchor='right', yanchor='middle',
                            ax=0, ay=0,
                            x=data.cum_percent.unique()[i]-0.001, y=-0.7,
                            text="(+"+str(data.new[i])+")"), font_size=14,
                       font_color = px.colors.qualitative.Plotly[i])
    fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
                            arrowhead=0, xanchor='right', yanchor='middle',
                            ax=0, ay=0,
                            x=0.18, y=-0.7,
                            text='New Cases: (+'+str(cases_daily[-1])+')', font_size=14))
    
fig.update_xaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False)
fig.update_yaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False)
fig.show()
PH COVID-19 Tracker
As of May 26, 2020
In [130]:
covid_ph1 = covid_ph.copy()
covid_ph1['RemovalType'] = covid_ph1.RemovalType.fillna('Active')
data = covid_ph1[covid_ph1.RemovalType == 'Active'].groupby('HealthStatus').CaseCode.count().reset_index().rename({'CaseCode': 'Count'}, axis=1)
data['Percent'] = data['Count']/data['Count'].sum()*100
data['Pct'] = round(data['Percent'], 1).astype(str)+'%'
data['Cum_percent'] = data.Percent.cumsum()

# a = pd.Series(['Critical', 'Severe', 'Mild', 'Asymptomatic']).reset_index().rename({0: 'HealthStatus'}, axis=1)
a = pd.Series(['Asymptomatic', 'Mild', 'Severe', 'Critical']).reset_index().rename({0: 'HealthStatus'}, axis=1)
data1 = pd.merge(data, a, how='left', on='HealthStatus') 
data1 = data1.sort_values(by='index', ascending=False)
data1['Count_percent'] = data['Count'].astype(str) + " (" + data1['Pct'] + ")"

fig = px.bar(data1, x='Count', y='HealthStatus', orientation='h',  
             text='Count', hover_name='Pct', height=400)
fig.update_layout(title=("Health Status of Active Cases"), showlegend=False,
                  hoverlabel=dict(font_size=17, bgcolor='blue'), hovermode='y', margin=dict(l=200))
fig.update_traces(textposition='outside', textfont_size=15)
fig.update_xaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False, title=None,
                 range=[0, data1.Count.sum()])
fig.update_yaxes(title=None, tickfont_size=15)
fig.show()
In [131]:
topcity = covid_ph[covid_ph.RegionRes == 'NCR'].groupby('CityMunRes')['CaseCode'].count().sort_values(ascending=False).index.tolist()
# topcity.remove('For Validation')
# topcity.append('For Validation')
topcity

data01 = pd.DataFrame()
for i in list(reversed(topcity)):
    df = covid_ph[covid_ph.CityMunRes == i].groupby('RemovalType').count()['CaseCode'].reset_index()
    df = df.sort_values('RemovalType')
    df['City'] = i
    df = df.rename({'CaseCode': 'Count0', 'RemovalType': 'Type'}, axis=1)
    df['Percent'] = df['Count0'] / df['Count0'].sum()*100
    df['Pct'] = round(df['Percent'], 1).astype(str)+'%'
    data01 = data01.append(df)
    
data01['Count'] = data01.Count0.astype(int).apply(lambda x : "{:,}".format(x))
cases_ncr = covid_ph[covid_ph.RegionRes == 'NCR'].shape[0]

fig = px.bar(data01, x='Percent', orientation='h', color='Type', 
             title="Total Confirmed Cases in <b>NCR</b>: " + f"<b>{cases_ncr}</b>" + " (" + str(round(cases_ncr/total_cases*100,1)) + "% of Total)",
             text='Count', hover_name='Count', height=800)
fig.update_layout(legend=dict(orientation='h', x=0.52, y=1, title="", font_size=12), hoverlabel=dict(font_size=20))
fig.for_each_trace(lambda t: t.update(textfont_color='white', textfont_size=8))

for i in range(0, len(data01.City.unique())):
    # City
    fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
                            arrowhead=0, xanchor='right', yanchor='middle',
                            ax=0, ay=0,
                            x=0, y=i,
                            text=data01.City.unique()[i] + " "*15), font_size=15)
    
    # Death Rate
    fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
                            arrowhead=0, xanchor='left', yanchor='middle',
                            ax=0, ay=0,
                            x=104, y=i,
                            text=data01[(data01.Type == 'Died') & (data01.City == list(reversed(topcity))[i])]['Pct'].values[0]), 
                       font_size=14, font_color = px.colors.qualitative.Plotly[1])

    # Recovery Rate
    fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
                            arrowhead=0, xanchor='left', yanchor='middle',
                            ax=0, ay=0,
                            x=119, y=i,
                            text=data01[(data01.Type == 'Recovered') & (data01.City == list(reversed(topcity))[i])]['Pct'].values[0]), 
                       font_size=14, font_color = px.colors.qualitative.Plotly[2])

    # Total Confirmed Cases
    fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
                            arrowhead=0, xanchor='right', yanchor='middle',
                            ax=0, ay=0,
                            x=0, y=i,
                            text="{:,}".format(data01[data01.City == list(reversed(topcity))[i]].Count0.sum())+" "*3), 
                       font_size=14)

# Death Rate
fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
                        arrowhead=0, xanchor='left', yanchor='middle',
                        ax=0, ay=0,
                        x=103, y=17,
                        text="Fatality<br>Rate"), 
                   font_size=11, font_color = px.colors.qualitative.Plotly[1])

# Recovery Rate
fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
                        arrowhead=0, xanchor='left', yanchor='middle',
                        ax=0, ay=0,
                        x=118, y=17,
                        text="Recovery<br>Rate"), 
                   font_size=11, font_color = px.colors.qualitative.Plotly[2])

# Total Confirmed Rate
fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
                        arrowhead=0, xanchor='right', yanchor='middle',
                        ax=0, ay=0,
                        x=-1, y=17,
                        text="Confirmed<br>Cases"), 
                   font_size=11)

fig.update_xaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False, side='top', tickfont_size=8, title=None)
fig.update_yaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False)
fig.show()

Daily Cases

In [18]:
base = pd.DataFrame(covid_ph.DateRepConf.sort_values().unique()).rename({0: 'Date'}, axis=1)

c = cases_daily.reset_index().rename({'DateRepConf': 'Date', 'CaseCode': 'Count'}, axis=1)
c = pd.merge(base, c, how='left', on='Date')
c['Type'] = 'Reported Confirmed Cases'
c.Count.fillna(0)
c['Cum_count'] = c.Count.cumsum()
c['Percent_change'] = c.Count.pct_change()*100
c['%Change'] = c.Cum_count.pct_change()*100

d = deaths_daily.reset_index().rename({'DateRepRem': 'Date', 'CaseCode': 'Count'}, axis=1)
d = pd.merge(base, d, how='left', on='Date')
d['Type'] = 'Reported Deaths'
#d = deaths_daily.reset_index().rename({'DateDied': 'Date', 'CaseCode': 'Count'}, axis=1)
#d['Type'] = 'Number of Actual Deaths'
d.Count = d.Count.fillna(0)
d['Cum_count'] = d.Count.cumsum()
d['Percent_change'] = d.Count.pct_change()*100
d['%Change'] = d.Cum_count.pct_change()*100

r = recoveries_daily.reset_index().rename({'DateRepRem': 'Date', 'CaseCode': 'Count'}, axis=1) 
r = pd.merge(base, r, how='left', on='Date')
r['Type'] = 'Reported Recoveries'
#r = recoveries_daily.reset_index().rename({'DateRecover': 'Date', 'CaseCode': 'Count'}, axis=1)
#r['Type'] = 'Number of Actual Recoveries'
r.Count = r.Count.fillna(0)
r['Cum_count'] = r.Count.cumsum()
r['Percent_change'] = r.Count.pct_change()*100
r['%Change'] = r.Cum_count.pct_change()*100

import statsmodels.api as sm

cycle, trend1 = sm.tsa.filters.hpfilter(c['Count'], 7)
c['trend_HP'] = trend1
cycle, trend2 = sm.tsa.filters.hpfilter(d['Count'], 7)
d['trend_HP'] = trend2
cycle, trend3 = sm.tsa.filters.hpfilter(r['Count'], 7)
r['trend_HP'] = trend3

c['trend_7MA'] = c['Count'].rolling(7).mean()
d['trend_7MA'] = d['Count'].rolling(7).mean()
r['trend_7MA'] = r['Count'].rolling(7).mean()

c['trend_7MA'] = c['trend_7MA'].fillna(" ")

dr = pd.concat([r,d])

dr['trend_7MA_name'] = 'Trend (7-day MA)'
dr['trend_HP_name'] = 'Trend (HP Filter)'

d['trend_7MA_name'] = 'Trend (7-day MA)'
d['trend_HP_name'] = 'Trend (HP Filter)'
r['trend_7MA_name'] = 'Trend (7-day MA)'
r['trend_HP_name'] = 'Trend (HP Filter)'
c['trend_7MA_name'] = 'Trend (7-day MA)'
c['trend_HP_name'] = 'Trend (HP Filter)'
In [19]:
fig1 = px.bar(c, x='Date', y='Count', color='Type', 
              color_discrete_sequence=["lightblue"])

fig2 = px.line(c, x='Date', y='trend_7MA', color='trend_7MA_name', line_shape='spline', 
               color_discrete_sequence=['blue'])
# fig3 = px.line(c, x='Date', y='trend_HP', color='trend_HP_name', line_shape='spline', 
#                color_discrete_sequence=['lightblue'])

fig1.add_trace(fig2.data[0])
# fig1.add_trace(fig3.data[0].update(line={'dash': 'dash'}))
fig1.update_yaxes(title="Case Count", tickfont_size=10)
fig1.update_layout(title=dict(x=0.5, text="PH Covid-19 Daily Confirmed Cases"),
                   xaxis=dict(tickmode='array', dtick=0, 
                              tickfont_size=9, title="Date"),
                   legend=dict(orientation="v", title="",
                               x=0.05, y=0.75, yanchor="bottom"), 
                   hoverlabel=dict(font_size=20))

fig1.show()
In [20]:
fig1 = px.bar(d, x='Date', y='Count', color='Type', 
              color_discrete_sequence=["lightpink"])

fig2 = px.line(d, x='Date', y='trend_7MA', color='trend_7MA_name', line_shape='spline', 
               color_discrete_sequence=['tomato'])
# fig3 = px.line(c, x='Date', y='trend_HP', color='trend_HP_name', line_shape='spline', 
#                color_discrete_sequence=['pink'])

fig1.add_trace(fig2.data[0])
# fig1.add_trace(fig3.data[0].update(line={'dash': 'dash'}))
fig1.update_yaxes(title="Case Count", tickfont_size=10)
fig1.update_layout(title=dict(x=0.5, text="PH Covid-19 Daily Death Cases"),
                   xaxis=dict(tickmode='array', dtick=0, 
                              tickfont_size=9, title="Date"),
                   legend=dict(orientation="v", title="",
                               x=0.05, y=0.75, yanchor="bottom"), 
                   hoverlabel=dict(font_size=20))

fig1.show()
In [21]:
fig1 = px.bar(r, x='Date', y='Count', color='Type', 
              color_discrete_sequence=["lightgreen"])

fig2 = px.line(r, x='Date', y='trend_7MA', color='trend_7MA_name', line_shape='spline', 
               color_discrete_sequence=['green'])
# fig3 = px.line(c, x='Date', y='trend_HP', color='trend_HP_name', line_shape='spline', 
#                color_discrete_sequence=['pink'])

fig1.add_trace(fig2.data[0])
# fig1.add_trace(fig3.data[0].update(line={'dash': 'dash'}))
fig1.update_yaxes(title="Case Count", tickfont_size=10)
fig1.update_layout(title=dict(x=0.5, text="PH Covid-19 Daily Recovered Cases"),
                   xaxis=dict(tickmode='array', dtick=0, 
                              tickfont_size=9, title="Date"),
                   legend=dict(orientation="v", title="",
                               x=0.05, y=0.75, yanchor="bottom"), 
                   hoverlabel=dict(font_size=20))

fig1.show()

Cumulative Cases

Cumulative Cases In Log Scale
In [22]:
cc_ecq = cc[cc.Date >= '2020-03-15'].reset_index(drop=True).reset_index()
dr_ecq = dr[dr.Date >= '2020-03-15']

# Figure 1
fig1 = px.line(cc_ecq, x='Date', y='Cum_count', color='Type', log_y=True,
              color_discrete_sequence=['blue'])
fig1.update_yaxes(title="log(Case Count)", tickfont_size=10)
fig1.update_layout(title=dict(x=0.5, text="PH Covid-19 Cumulative Number of Cases (in Log Scale)"),
                   xaxis=dict(tickmode='array', showgrid=False, dtick=0, tickfont_size=9, title="Day"),
                   legend=dict(orientation="v", title="",
                               x=0.05, y=0.8, yanchor="bottom"), 
                   hoverlabel=dict(font_size=20), hovermode="x")
# fig1.update_layout(shapes=[dict(type= 'line', 
#                                 yref= 'y', y0=1, y1=10000, 
#                                 xref= 'x', x0=1, x1=50, 
#                                 line=dict(color="green", width=1, dash="dashdot"))])
fig1.show()

# Figure 2
fig2 = px.line(dr_ecq, x='Date', y='Cum_count', 
               color='Type', 
               line_shape='spline', log_y = True,
               color_discrete_sequence=['limegreen', 'tomato'])

fig2.update_yaxes(title="log(Case Count)", showgrid=True, tickfont_size=10)
fig2.update_layout(title_text="PH Covid-19 Cumulative Number of Cases (in Log Scale)")
fig2.update_layout(title=dict(x=0.5),
                  xaxis=dict(tickmode='array', showgrid=False, #tick0=0,
                             dtick=0, tickfont_size=9, title="Date"),
                  legend=dict(orientation="v", title="",
                              x=0.05, y=0.8, yanchor="bottom"), 
                  hoverlabel=dict(font_size=20), hovermode='x')

fig2.show()

print("Note: Day 0 is March 15 when ECQ was implemented & hit 140 confirmed cases.")
Note: Day 0 is March 15 when ECQ was implemented & hit 140 confirmed cases.
In [23]:
# # Create figure with secondary y-axis
# fig = make_subplots(specs=[[{"secondary_y": True}]])

# # Figure 1
# fig1 = px.line(dr, x='Date', y='Cum_count', color='Type', line_shape='spline',
#                color_discrete_sequence=['limegreen', 'tomato'])

# # Figure 2
# fig2 = px.bar(c, x='Date', y='Cum_count', color='Type', 
#               color_discrete_sequence=["lightblue"])

# fig.add_trace(fig2.data[0], secondary_y=False)
# fig.add_trace(fig1.data[0], secondary_y=True)
# fig.add_trace(fig1.data[1], secondary_y=True)

# fig.update_yaxes(range=[0, dr.Cum_count.max()*1.5], 
#                  title="Deaths / Recoveries", tickfont_size=10, secondary_y=True)
# fig.update_yaxes(title="Active Cases", showgrid=False, tickfont_size=10, secondary_y=False)

# fig.update_layout(title_text="PH Covid-19 Cumulative Number of Cases")
# fig.update_layout(title=dict(x=0.5),
#                   xaxis=dict(tickmode='linear', #tick0=0, 
#                              dtick=0, tickfont_size=8, title="Date"),
#                   legend=dict(orientation="v", title="",
#                               x=0.02, y=0.7, yanchor="bottom"), 
#                   hovermode='x')

# fig.show()

Percent Change on Daily Cases

In [25]:
print('-------------------------------- Confirmed Cases --------------------------------')
print('1-day Percent Growth:', str(round(c1['%Change'].tail(1).values[0],1))+'%')
print('7-day Average Percent Growth:', str(round(c1['%Change'].tail(7).mean(),1))+'%')
print('30-day Average Percent Growth:', str(round(c1['%Change'].tail(30).mean(),1))+'%')
-------------------------------- Confirmed Cases --------------------------------
1-day Percent Growth: 2.4%
7-day Average Percent Growth: 1.8%
30-day Average Percent Growth: 2.2%
In [26]:
# Figure 1
c1['trend_name'] = 'Trend (HP Filter)'
fig_trend = px.line(c1, x='Date', y='trend', color='trend_name', 
                    color_discrete_sequence=["cadetblue"])

fig2 = px.line(c1, x='Date', y='%Change', color='Type',
               color_discrete_sequence=["cadetblue"])
fig2.add_trace(fig_trend.data[0].update(line={'dash': 'dash'}))
fig2.update_yaxes(title="%Change", showgrid=False, tickfont_size=10)
fig2.update_layout(title=dict(x=0.5, text="%Change on Daily Confirmed Cases"),
                   xaxis=dict(tickmode='array', #tick0=4, dtick=0, 
                              tickfont_size=9, title="Date", showgrid=False),
                   legend=dict(orientation="v", title="",
                              x=0.75, y=0.85, yanchor="bottom"), 
                   hoverlabel=dict(font_size=20))

# fig2.add_annotation(dict(xref="x", yref="y", showarrow=True, arrowhead=1,
#                          xanchor='left', yanchor='middle', ax=0, ay=0,
#                          x=pd.to_datetime(c1.Date.unique().max()), 
#                          y=round(c1['trend'][-1:].values[0],1),
#                          text=" "*2 + round(c1['trend'][-1:].values[0],1).astype('str')+'%', 
#                          font_size=14, font_color='cadetblue'))

fig2.show()
In [27]:
print('--------------------------------Reported Deaths--------------------------------')
print('1-day Percent Growth:', str(round(d1['%Change'].tail(1).values[0],1))+'%')
print('7-day Average Percent Growth:', str(round(d1['%Change'].tail(7).mean(),1))+'%')
print('30-day Average Percent Growth:', str(round(d1['%Change'].tail(30).mean(),1))+'%')
--------------------------------Reported Deaths--------------------------------
1-day Percent Growth: 1.5%
7-day Average Percent Growth: 0.8%
30-day Average Percent Growth: 1.9%
In [28]:
# Figure 2
d1['trend_name'] = 'Trend (HP Filter)'
fig_trend = px.line(d1, x='Date', y='trend', color='trend_name', color_discrete_sequence=["tomato"])

fig2 = px.line(d1, x='Date', y='%Change', color='Type', 
               color_discrete_sequence=["tomato"])
fig2.add_trace(fig_trend.data[0].update(line={'dash': 'dash'}))
fig2.update_yaxes(title="%Change", showgrid=False, tickfont_size=10)
fig2.update_layout(title=dict(x=0.5, text="%Change on Daily Reported Deaths"),
                   xaxis=dict(tickmode='array', #tick0=4, dtick=0, 
                              tickfont_size=9, title="Date", showgrid=False),
                   legend=dict(orientation="v", title="",
                              x=0.75, y=0.85, yanchor="bottom"), 
                   hoverlabel=dict(font_size=20))

# fig2.add_annotation(dict(xref="x", yref="y", showarrow=True, arrowhead=1,
#                          xanchor='left', yanchor='middle', ax=0, ay=0,
#                          x=pd.to_datetime(d1.Date.unique().max()), 
#                          y=round(d1['trend'][-1:].values[0],1),
#                          text=" "*2 + round(d1['trend'][-1:].values[0],1).astype('str')+'%', 
#                          font_size=14, font_color='tomato'))

fig2.show()
-------------------------------- Reported Recoveries --------------------------------
1-day Percent Growth: 2.7%
7-day Average Percent Growth: 2.6%
30-day Average Percent Growth: 4.7%

Active-Death-Recovery Trend

In [31]:
active_daily = np.maximum(cases_daily - recoveries_daily  - deaths_daily, 0).fillna(0)

active_rate_daily = (active_daily.cumsum() / cases_daily.cumsum())*100
death_rate_daily = (deaths_daily.cumsum() / cases_daily.cumsum())*100
recovery_rate_daily = (recoveries_daily.cumsum() / cases_daily.cumsum())*100
In [33]:
aaa1 = active_rate_daily.reset_index().rename({'index': 'Date', 'CaseCode': 'Rate (%)'}, axis=1)
aaa1['Type'] = 'Active Rate'
ddd1 = death_rate_daily.reset_index().rename({'index': 'Date', 'CaseCode': 'Rate (%)'}, axis=1)
ddd1['Type'] = 'Death Rate'
rrr1 = recovery_rate_daily.reset_index().rename({'index': 'Date', 'CaseCode': 'Rate (%)'}, axis=1)
rrr1['Type'] = 'Recovery Rate'

adr1 = pd.concat([aaa1, ddd1, rrr1])

# March 16 - start of ECQ
aaa2 = aaa1[46:]
ddd2 = ddd1[46:]
rrr2 = rrr1[46:]
adr2 = pd.concat([aaa2, ddd2, rrr2])

fig = px.line(adr2, x='Date', y='Rate (%)', color='Type')
fig.update_xaxes(showticklabels=True, showgrid=False, zeroline=False, showline=False)
fig.update_yaxes(showticklabels=True, showgrid=True, zeroline=False, showline=False)

# fig.update_layout(title_text="PH Covid-19 Cumulative Number of Cases")
fig.update_layout(title=dict(x=0.5), showlegend=True,
                  xaxis=dict(tickmode='array', #tick0=0, 
                             dtick=0, tickfont_size=8, title="Date"),
                  yaxis=dict(tickfont_size=10),
                  legend=dict(orientation="h", title=" ",
                              x=-0.02, y=1, yanchor="bottom"), 
                  hoverlabel=dict(font_size=20))
fig.add_annotation(dict(xref="x", yref="y", showarrow=True, arrowhead=1, 
                        xanchor='left', yanchor='bottom', ax=0, ay=20,
                        x=pd.to_datetime(adr2.Date.unique().min()), y=0,
                        text='Start of ECQ'), font_size=11)
# Latest Rate
fig.add_annotation(dict(xref="x", yref="y", showarrow=True, arrowhead=1, 
                        xanchor='left', yanchor='middle', ax=0, ay=0,
                        x=pd.to_datetime(adr2.Date.unique().max()), 
                        y=aaa2['Rate (%)'][-1:].values[0],
                        text=" "*2 + round(aaa2['Rate (%)'][-1:].values[0],1).astype('str')+'%', 
                        font_size=15, font_color=px.colors.qualitative.Plotly[0]))
fig.add_annotation(dict(xref="x", yref="y", showarrow=True, arrowhead=1, 
                        xanchor='left', yanchor='middle', ax=0, ay=0,
                        x=pd.to_datetime(adr2.Date.unique().max()), 
                        y=ddd2['Rate (%)'][-1:].values[0],
                        text=" "*2 + round(ddd2['Rate (%)'][-1:].values[0],1).astype('str')+'%', 
                        font_size=15, font_color=px.colors.qualitative.Plotly[1]))
fig.add_annotation(dict(xref="x", yref="y", showarrow=True, arrowhead=1, 
                        xanchor='left', yanchor='middle', ax=0, ay=0,
                        x=pd.to_datetime(adr2.Date.unique().max()), 
                        y=rrr2['Rate (%)'][-1:].values[0],
                        text=" "*2 + round(rrr2['Rate (%)'][-1:].values[0],1).astype('str')+'%', 
                        font_size=15, font_color=px.colors.qualitative.Plotly[2]))

# Legend name
# fig.add_annotation(dict(xref="x", yref="y", showarrow=True, arrowhead=1, 
#                         xanchor='right', yanchor='middle', ax=0, ay=0,
#                         x=pd.to_datetime(adr2.Date.unique().min()), 
#                         y=a2['Rate (%)'][:1].values[0],
#                         text="Active Rate"+" ",
#                         font_size=14))
# fig.add_annotation(dict(xref="x", yref="y", showarrow=True, arrowhead=1, 
#                         xanchor='right', yanchor='middle', ax=0, ay=0,
#                         x=pd.to_datetime(adr2.Date.unique().min()), 
#                         y=d2['Rate (%)'][:1].values[0],
#                         text="Death Rate"+" ", 
#                         font_size=14))
# fig.add_annotation(dict(xref="x", yref="y", showarrow=True, arrowhead=1, 
#                         xanchor='right', yanchor='middle', ax=0, ay=0,
#                         x=pd.to_datetime(adr2.Date.unique().min()), 
#                         y=r2['Rate (%)'][:1].values[0],
#                         text="Recovery Rate"+" ", 
#                         font_size=14))

fig.show()

Patients Profile

Sex

In [34]:
# Figure 0
data = covid_ph.groupby('Sex')['CaseCode'].count().reset_index().sort_values('CaseCode', ascending=False).rename({'CaseCode':'Count'}, axis=1).reset_index(drop=True)
data['Percent'] = (data.Count / data.Count.sum())*100
data['Pct'] = round(data['Percent'],1).astype('str') + '%'

fig = px.bar(data, x='Percent', orientation='h', color='Sex', 
             text='Count', hover_name='Pct', height=250, labels={'Percent': ''})
fig.update_layout(legend=dict(orientation='h', title=""), hovermode=False, font_size=14,
                  title=("Total Confirmed Cases: " + f"<b>{total_cases}</b>"))
fig.for_each_trace(lambda t: t.update(textfont_color='white', textfont_size=20))
#for i in range(0, len(data.Percent.unique())):
for i in [0, 1]:
    fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
                            arrowhead=0, xanchor='right', yanchor='middle',
                            ax=0, ay=0,
                            x=data.Percent.cumsum().unique()[i]-0.001, y=0.6,
                            text=data.Pct[i]), font_size=16,
                       font_color = px.colors.qualitative.Plotly[i])
    
fig.update_xaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False)
fig.update_yaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False)
fig.show()

# Figure 1 & 2
data1 = covid_ph.groupby(['Sex','DateRepConf'])['CaseCode'].count().reset_index().rename({'CaseCode':'Count'}, axis=1)

date_f = active_daily.index.to_frame().reset_index(drop='True').rename({0: 'DateRepConf'}, axis=1)
date_f['Sex'] = 'Female'
data_f = pd.merge(date_f, data1[data1.Sex == 'Female'].iloc[:,1:], how='left', on='DateRepConf').fillna(0)
data_f['Cum_count'] = data_f.Count.cumsum()
date_m = active_daily.index.to_frame().reset_index(drop='True').rename({0: 'DateRepConf'}, axis=1)
date_m['Sex'] = 'Male'
data_m = pd.merge(date_m, data1[data1.Sex == 'Male'].iloc[:,1:], how='left', on='DateRepConf').fillna(0)
data_m['Cum_count'] = data_m.Count.cumsum()

data2 = pd.concat([data_m, data_f]).reset_index(drop=True)
data2['Percent'] = data2.groupby('DateRepConf')['Cum_count'].apply(lambda x: 100 * x / float(x.sum())).values
data3 = data2[data2.DateRepConf > '2020-03-15'] # Filter start ECQ

fig1 = px.line(data3, x='DateRepConf', y='Cum_count', color='Sex')
fig2 = px.bar(data3, x='DateRepConf', y='Percent', color='Sex')

fig000 = make_subplots(rows=1, cols=2, subplot_titles=("Cumulative Count by Sex", "Cumulative Sex Ratio"))
for loop in range(0, len(data.Sex.unique())):
    fig000.add_trace(fig1.data[loop], row=1, col=1)
    fig000.add_trace(fig2.data[loop], row=1, col=2)
    fig000.update_traces(showlegend=True, row=1, col=1)
    fig000.update_traces(showlegend=False, row=1, col=2)
fig000.update_layout(barmode='stack', 
                     xaxis1=dict(tickmode='array', dtick=0, tickfont_size=8, title="Date"),
                     xaxis2=dict(tickmode='array', dtick=0, tickfont_size=8, title="Date"), 
                     yaxis1=dict(tickfont_size=8), yaxis2=dict(tickfont_size=8))
# fig000.update_xaxes(showticklabels=False, showgrid=False, 
#                     zeroline=False, showline=False)
# fig000.update_yaxes(showticklabels=False, showgrid=False, 
#                     zeroline=False, showline=False, row=1, col=2)
fig000.update_layout(title=dict(x=0.5), showlegend=False, height=400,
                     legend=dict(orientation="h", x=-0.05, y=-0.4,
                                 yanchor="bottom"), hoverlabel=dict(font_size=20))
    
fig000.show()


# Figure 3 & 4
data2 = pd.concat([data_m, data_f]).reset_index(drop=True)
data2['Percent'] = data2.groupby('DateRepConf')['Count'].apply(lambda x: 100 * x / float(x.sum())).values
data3 = data2[data2.DateRepConf > '2020-03-15'] # Filter start ECQ

fig1 = px.line(data3, x='DateRepConf', y='Count', color='Sex')
fig2 = px.bar(data3, x='DateRepConf', y='Percent', color='Sex')

fig000 = make_subplots(rows=1, cols=2, subplot_titles=("Daily Case Count by Sex", "Daily Case Count Sex Ratio"))
for loop in range(0, len(data.Sex.unique())):
    fig000.add_trace(fig1.data[loop], row=1, col=1)
    fig000.add_trace(fig2.data[loop], row=1, col=2)
    fig000.update_traces(showlegend=True, row=1, col=1)
    fig000.update_traces(showlegend=False, row=1, col=2)
fig000.update_layout(barmode='stack', 
                     xaxis1=dict(tickmode='array', dtick=0, tickfont_size=8, title="Date"),
                     xaxis2=dict(tickmode='array', dtick=0, tickfont_size=8, title="Date"), 
                     yaxis1=dict(tickfont_size=8), yaxis2=dict(tickfont_size=8))
# fig000.update_xaxes(showticklabels=False, showgrid=False, 
#                     zeroline=False, showline=False)
# fig000.update_yaxes(showticklabels=False, showgrid=False, 
#                     zeroline=False, showline=False, row=1, col=2)
fig000.update_layout(title=dict(x=0.5), showlegend=False, height=400,
                     legend=dict(orientation="h", x=-0.05, y=-0.4,
                                 yanchor="bottom"), hoverlabel=dict(font_size=20))
    
fig000.show()

print("Note: There are", covid_ph.Sex.isnull().sum(), "cases with no sex information")
Note: There are 0 cases with no sex information
In [35]:
# Figure 01
data01 = covid_ph.groupby(['Sex','RemovalType'])['CaseCode'].count().reset_index().sort_values('CaseCode', ascending=False).rename({'CaseCode':'Count'}, axis=1)
data01['Percent'] = data01.groupby('RemovalType')['Count'].apply(lambda x: 100 * x / float(x.sum())).values
data01['Pct'] = round(data01['Percent'],1).astype('str') + '%'

fig = px.bar(data01, x='Percent', orientation='h', color='Sex', 
             text='Count', hover_name='Pct', hover_data=None, height=350)
fig.update_layout(legend=dict(orientation='h', x=0.09, y=0, title=""), hoverlabel=dict(font_size=20))
fig.for_each_trace(lambda t: t.update(textfont_color='white', textfont_size=18))
for i in range(0, len(data01.RemovalType.unique())):
    fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
                            arrowhead=0, xanchor='right', yanchor='middle',
                            ax=0, ay=0,
                            x=0, y=i,
                            text=data01.RemovalType.unique()[i] + " "), font_size=15)
    
fig.update_xaxes(showticklabels=True, showgrid=False, zeroline=False, showline=False, side='top', tickfont_size=8, title=None)
fig.update_yaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False)
fig.show()

Age

*Note: There are 74 ( 0.5% ) cases with no age information
In [38]:
df_recovered = covid_ph.loc[(covid_ph.RemovalType=='Recovered')].reset_index(drop=True)
df_died = covid_ph.loc[(covid_ph.RemovalType=='Died')].reset_index(drop=True)

df = pd.concat([df_recovered, df_died])
fig = px.histogram(df, x='Age', color='RemovalType', opacity=0.8, nbins=20,
                   marginal='box', barmode='overlay', 
                   labels={"RemovalType": "PatientType"}, 
                   title='Age Distribution of Recovered vs Died Patients')
fig.update_layout(legend=dict(orientation="h", title="", 
                              x=0.03, y=0.65, yanchor="bottom"),
                  xaxis=dict(tickmode='linear', dtick=10))

fig.add_annotation(dict(xref="x", yref="paper", showarrow=False,
                        arrowhead=0, xanchor='right', yanchor='middle',
                        ax=0, ay=0,
                        x=df_recovered.Age.median()+1, y=0.86,
                        text=int(df_recovered.Age.median()), font_size=11,
                        font_color = px.colors.qualitative.Plotly[0]))
fig.add_annotation(dict(xref="x", yref="paper", showarrow=False,
                        arrowhead=0, xanchor='right', yanchor='middle',
                        ax=0, ay=0,
                        x=df_died.Age.median()+1, y=0.99,
                        text=int(df_died.Age.median()), font_size=11,
                        font_color = px.colors.qualitative.Plotly[1]))

fig.show()
In [39]:
df_recovered = covid_ph.loc[(covid_ph.RemovalType=='Recovered')].reset_index(drop=True)
df_died = covid_ph.loc[(covid_ph.RemovalType=='Died')].reset_index(drop=True)

df = pd.concat([df_recovered, df_died])

# Recovered Patients
fig1 = px.histogram(df_recovered, x='Age', color='Sex', opacity=0.8, 
                   marginal='box', barmode='overlay', 
                   labels={"RemovalType": "PatientType"}, 
                   title='Age Distribution of Recovered Patients')
fig1.update_layout(legend=dict(orientation="h", title="", x=0.03, y=0.65, yanchor="bottom"),                   
                  xaxis=dict(tickmode='linear', dtick=10))

fig1.add_annotation(dict(xref="x", yref="paper", showarrow=False,
                        arrowhead=0, xanchor='right', yanchor='middle',
                        ax=0, ay=0,
                        x=df_recovered[df_recovered.Sex == 'Male'].Age.median()+1, y=0.86,
                        text=int(df_recovered[df_recovered.Sex == 'Male'].Age.median()), font_size=11,
                        font_color = px.colors.qualitative.Plotly[0]))
fig1.add_annotation(dict(xref="x", yref="paper", showarrow=False,
                        arrowhead=0, xanchor='right', yanchor='middle',
                        ax=0, ay=0,
                        x=df_recovered[df_recovered.Sex == 'Female'].Age.median()+1, y=0.99,
                        text=int(df_recovered[df_recovered.Sex == 'Female'].Age.median()), font_size=11,
                        font_color = px.colors.qualitative.Plotly[1]))
fig1.show()

fig2 = px.histogram(df_died, x='Age', color='Sex', opacity=0.8, 
                   marginal='box', barmode='overlay', 
                   labels={"RemovalType": "PatientType"}, 
                   title='Age Distribution of Died Patients')
fig2.update_layout(legend=dict(orientation="h", title="", x=0.03, y=0.65, yanchor="bottom"),
                   xaxis=dict(tickmode='linear', dtick=10))

fig2.add_annotation(dict(xref="x", yref="paper", showarrow=False,
                        arrowhead=0, xanchor='right', yanchor='middle',
                        ax=0, ay=0,
                        x=df_died[df_died.Sex == 'Male'].Age.median()+1, y=0.86,
                        text=int(df_died[df_died.Sex == 'Male'].Age.median()), font_size=11,
                        font_color = px.colors.qualitative.Plotly[0]))
fig2.add_annotation(dict(xref="x", yref="paper", showarrow=False,
                        arrowhead=0, xanchor='right', yanchor='middle',
                        ax=0, ay=0,
                        x=df_died[df_died.Sex == 'Female'].Age.median()+1, y=0.99,
                        text=int(df_died[df_died.Sex == 'Female'].Age.median()), font_size=11,
                        font_color = px.colors.qualitative.Plotly[1]))
fig2.show()

Residence

In [40]:
df_regres = covid_ph.groupby('ProvRes').count()['CaseCode'].sort_values(ascending=False).reset_index()
df_regres['PH'] = 'Philippines'
df_regres.rename({'CaseCode': 'Count'}, inplace=True, axis=1)
df_regres['Percentage'] = round((df_regres.Count / df_regres.Count.sum())*100,1).astype('str')+'%' 
df_regres = df_regres[df_regres.ProvRes != 'For Validation']

df_regres1 = covid_ph.groupby(['ProvRes', 'CityMunRes']).count()['CaseCode'].sort_values(ascending=False).reset_index()
df_regres1['PH'] = 'Philippines'
df_regres1.rename({'CaseCode': 'Count'}, inplace=True, axis=1)
df_regres1['Percentage'] = round((df_regres1.Count / df_regres1.Count.sum())*100,1).astype('str')+'%' 
df_regres1 = df_regres1[df_regres1.ProvRes != 'For Validation']
df_regres1 = df_regres1[df_regres1.CityMunRes != 'For Validation']
In [41]:
top = 10
fig1 = px.bar(df_regres.head(top), x="Count", y="ProvRes",
             orientation='h', hover_name='Percentage',
             text="Count",
             title="Provinces with Most Cases")
fig1.update_traces(textposition='outside', textfont_size=12)
fig1.update_layout(title=dict(x=0.5), yaxis=dict(categoryorder='total ascending'), 
                   showlegend = False, legend_orientation="v")
fig2 = px.bar(df_regres1.head(top), x="Count", y="CityMunRes",
             orientation='h', hover_name='Percentage',
             text="Count",
             title="City/Municipalities with Most Cases")
fig2.update_traces(textposition='outside', textfont_size=12)
fig2.update_layout(title=dict(x=0.5), yaxis=dict(categoryorder='total ascending'),
                   showlegend = False, legend_orientation="v")
#fig1.show()
#fig2.show()

fig000 = make_subplots(rows=1, cols=2, subplot_titles=("Provinces with Most Cases", "Cities with Most Cases"))
fig000.add_trace(fig1.data[0], row=1, col=1)
fig000.add_trace(fig2.data[0], row=1, col=2)
fig000.update_layout(yaxis1=dict(categoryorder='total ascending'), yaxis2=dict(categoryorder='total ascending'), height=500)
fig000.update_xaxes(range=(0, round(max(df_regres.Count) + max(df_regres.Count)/4)), row=1, col=1)
fig000.update_xaxes(range=(0, round(max(df_regres1.Count) + max(df_regres1.Count)/4)), row=1, col=2)
fig000.update_layout(xaxis={"domain": [0.05, 0.40]}, xaxis2={"domain": [0.6, 1]},
                     hoverlabel=dict(font_size=15))
fig000.show()
Note:
1. There are 111 cases no information about residence.
2. There are 0 (0.0%) cases with region but no specific province/city of residence (tagged as "For Validation").
In [45]:
x = []
for i in range(0, len(geojson['features'])):
    z = list(geojson['features'][i].values())[1]['PROVINCE']
    x.append(z)
p = pd.DataFrame(x).rename({0: 'Province'}, axis=1)

df_regres0 = df_regres.copy()
df_regres0['ProvRes'] = df_regres0['ProvRes'].str.title()
df_regres0['ProvRes'] = df_regres0['ProvRes'].replace({'Metro Manila': 'Metropolitan Manila',
                                                       'Davao Del Sur': 'Davao del Sur',
                                                       'Zamboanga Del Sur': 'Zamboanga del Sur',
                                                       'Davao Del Norte': 'Davao del Norte',
                                                       'Lanao Del Sur': 'Lanao del Sur',
                                                       'Lanao Del Norte': 'Lanao del Norte',
                                                       'Agusan Del Norte': 'Agusan del Norte',
                                                       'Cotabato': 'North Cotabato', 
                                                       'Cebu Province': 'Cebu', 
                                                       'Iloilo Province': 'Iloilo', 
                                                       'Tarlac Province': 'Tarlac'})

df_regres0 = df_regres0.reset_index(drop=True)

# CHECKER
prov = pd.merge(p, df_regres0, how='outer', left_on='Province', right_on='ProvRes')
prov['Count_log'] = np.log10(prov['Count'])
prov['Count_log'] = prov['Count_log'].fillna(0)
prov['Count'] = prov['Count'].fillna(0)
# prov.tail(10)

Geo Mapping

In [46]:
fig = px.choropleth(prov, geojson=geojson, color="Count_log",
                    locations="Province", featureidkey="properties.PROVINCE",
                    hover_name='Count',
                    projection="mercator", 
                    labels={'Count_log': 'log(Count)'}, 
                    title='Number of Covid-19 Confirmed Cases in the Philippines (as of ' + str(asof.date().strftime("%b %d, %Y")) + ")" , 
                    color_continuous_scale=px.colors.sequential.OrRd,  
                    height=1100)
fig.update_layout(hoverlabel=dict(font_size=20, bgcolor='white', bordercolor='red'))
fig.update_geos(fitbounds="locations", visible=False)
fig.show()
In [47]:
df_ncr = df_regres1[df_regres1['ProvRes'] == 'METRO MANILA'].reset_index(drop=True)
df_ncr['CityMunRes'] = df_ncr['CityMunRes'].replace({'QUEZON CITY': 'Quezon City',
                         'CITY OF MANILA': 'Manila', 
                         'CITY OF PARAÑAQUE': 'Parañaque',
                         'CITY OF MAKATI': 'Makati City', 
                         'CITY OF MANDALUYONG': 'Mandaluyong', 
                         'CITY OF PASIG': 'Pasig City', 
                         'TAGUIG CITY': 'Taguig', 
                         'CITY OF SAN JUAN': 'San Juan', 
                         'CALOOCAN CITY': 'Kalookan City',
                         'PASAY CITY': 'Pasay City', 
                         'CITY OF LAS PIÑAS': 'Las Piñas',
                         'CITY OF MUNTINLUPA': 'Muntinlupa', 
                         'CITY OF MARIKINA': 'Marikina', 
                         'CITY OF VALENZUELA': 'Valenzuela', 
                         'CITY OF MALABON': 'Malabon', 
                         'CITY OF NAVOTAS': 'Navotas', 
                         'PATEROS': 'Pateros'})

# CHECKER
z = []
for i in range(0, len(geojson1['features'])):
    y = list(geojson1['features'][i].values())[1]['NAME_2']
    z.append(y)
p = pd.DataFrame(z).rename({0: 'City'}, axis=1)
p

check = pd.merge(df_ncr, p, how='left', left_on='CityMunRes', right_on='City')
# check

Cumulative Cases By Area

Metro Manila, Cebu & Other Provinces (With Most Cases)

In [139]:
print("As of", asof.date().strftime("%b %d, %Y")) 
top = 10
topreg = df_regres.head(top).ProvRes.tolist()

df_regres_daily = covid_ph.groupby(['ProvRes', 'DateRepConf']).count()['CaseCode'].sort_values(ascending=False).reset_index()
df_regres_daily.sort_values(by=['ProvRes','DateRepConf'], inplace=True)
#df_regres_daily['ProvRes'] = df_regres_daily['ProvRes'].replace({'NCR': 'Metropolitan Manila'})
#df_regres_daily['Cum_count'] = df_regres_daily.groupby('ProvRes')['CaseCode'].cumsum()
df_regres_daily = df_regres_daily[df_regres_daily.ProvRes != 'For Validation']

df_regres_daily1 = pd.DataFrame()
for i in topreg:
    a = df_regres_daily[df_regres_daily.ProvRes == i].rename({'CaseCode': 'Count'}, axis=1)
    
    base = pd.DataFrame(covid_ph.DateRepConf.sort_values().unique())
    base.rename({0: 'DateRepConf'}, axis=1, inplace=True)
    # base = base[base.DateRepConf >= '2020-03-06']
    
    q = pd.merge(base, a, how='left', on='DateRepConf')
    q['ProvRes'] = i
    q['Count'] = q.Count.fillna(0)
    q['Cum_count'] = q.Count.cumsum()
#     q['Percent_change'] = q.Cum_count.pct_change()*100
    
    df_regres_daily1 = df_regres_daily1.append(q)

df_regres_daily1

# Figure 1
fig0 = px.line(df_regres_daily1, x='DateRepConf', y='Cum_count', 
               color='ProvRes', labels={'Cum_count': 'Cumulative Count'}, 
               title="Cumulative Case Count on Location with Most Cases")
fig0.update_layout(showlegend=True) 

a = pd.DataFrame(topreg).rename({0: 'RegRes'}, axis=1)
# for i in topreg[:2]:
#     yyy = df_regres_daily1[(df_regres_daily1.ProvRes == i) & (df_regres_daily1.DateRepConf == pd.to_datetime(df_regres_daily1.DateRepConf.iloc[-1]))].Cum_count.values[0]
#     fig0.add_annotation(dict(xref="x", yref="y", showarrow=False,
#                          arrowhead=0, xanchor='left', yanchor='middle',
#                          ax=0, ay=0,
#                          x=df_regres_daily1.DateRepConf.iloc[-1],
#                          y=yyy,
#                          text=" "*1 + str(yyy.astype('int')),
#                          font_color=px.colors.qualitative.Plotly[topreg.index(i)]))
#     fig0.add_annotation(dict(xref="x", yref="y", showarrow=False,
#                          arrowhead=0, xanchor='left', yanchor='middle',
#                          ax=0, ay=0,
#                          x=df_regres_daily1.DateRepConf.iloc[-1],
#                          y=yyy,
#                          text=" "*10 + i, #PLEASE EDIT IF NEEDED
#                          font_color=px.colors.qualitative.Plotly[topreg.index(i)]))
fig0.show()

# Figure 2
fig1 = px.line(df_regres_daily1[~df_regres_daily1.ProvRes.isin(topreg[:2])], x='DateRepConf', y='Cum_count',
               color='ProvRes', labels={'Cum_count': 'Cumulative Count'}, 
               title="Cumulative Case Count on Location with Most Cases (excluding NCR & Cebu Province)", 
               color_discrete_sequence=px.colors.qualitative.Plotly[2:])
fig1.update_layout(showlegend=False) 

a = pd.DataFrame(topreg).rename({0: 'RegRes'}, axis=1)
# for i in topreg[1:2]:
#     yyy = df_regres_daily1[(df_regres_daily1.ProvRes == i) & (df_regres_daily1.DateRepConf == pd.to_datetime(df_regres_daily1.DateRepConf.iloc[-1]))].Cum_count.values[0]
#     fig1.add_annotation(dict(xref="x", yref="y", showarrow=False,
#                          arrowhead=0, xanchor='left', yanchor='middle',
#                          ax=0, ay=0,
#                          x=df_regres_daily1.DateRepConf.iloc[-1],
#                          y=yyy+5,
#                          text=" "*1 + str(yyy.astype('int')),
#                          font_color=px.colors.qualitative.Plotly[topreg.index(i)]))
#     fig1.add_annotation(dict(xref="x", yref="y", showarrow=False,
#                          arrowhead=0, xanchor='left', yanchor='middle',
#                          ax=0, ay=0,
#                          x=df_regres_daily1.DateRepConf.iloc[-1],
#                          y=yyy+5,
#                          text=" "*8 + i,
#                          font_color=px.colors.qualitative.Plotly[topreg.index(i)]))
for i in topreg[2:5]:
    yyy = df_regres_daily1[(df_regres_daily1.ProvRes == i) & (df_regres_daily1.DateRepConf == pd.to_datetime(df_regres_daily1.DateRepConf.iloc[-1]))].Cum_count.values[0]
    fig1.add_annotation(dict(xref="x", yref="y", showarrow=False,
                         arrowhead=0, xanchor='left', yanchor='middle',
                         ax=0, ay=0,
                         x=df_regres_daily1.DateRepConf.iloc[-1],
                         y=yyy,
                         text=" "*1 + str(yyy.astype('int')),
                         font_color=px.colors.qualitative.Plotly[topreg.index(i)]))
    fig1.add_annotation(dict(xref="x", yref="y", showarrow=False,
                         arrowhead=0, xanchor='left', yanchor='middle',
                         ax=0, ay=0,
                         x=df_regres_daily1.DateRepConf.iloc[-1],
                         y=yyy,
                         text=" "*8 + i,
                         font_color=px.colors.qualitative.Plotly[topreg.index(i)]))

for i in topreg[5:6]:
    yyy = df_regres_daily1[(df_regres_daily1.ProvRes == i) & (df_regres_daily1.DateRepConf == pd.to_datetime(df_regres_daily1.DateRepConf.iloc[-1]))].Cum_count.values[0]
    fig1.add_annotation(dict(xref="x", yref="y", showarrow=False,
                         arrowhead=0, xanchor='left', yanchor='middle',
                         ax=0, ay=0,
                         x=df_regres_daily1.DateRepConf.iloc[-1],
                         y=yyy,
                         text=" "*1 + str(yyy.astype('int')),
                         font_color=px.colors.qualitative.Plotly[topreg.index(i)]))
    fig1.add_annotation(dict(xref="x", yref="y", showarrow=False,
                         arrowhead=0, xanchor='left', yanchor='middle',
                         ax=0, ay=0,
                         x=df_regres_daily1.DateRepConf.iloc[-1],
                         y=yyy,
                         text=" "*8 + i,
                         font_color=px.colors.qualitative.Plotly[topreg.index(i)]))
    
for i in topreg[6:7]:
    yyy = df_regres_daily1[(df_regres_daily1.ProvRes == i) & (df_regres_daily1.DateRepConf == pd.to_datetime(df_regres_daily1.DateRepConf.iloc[-1]))].Cum_count.values[0]
    fig1.add_annotation(dict(xref="x", yref="y", showarrow=False,
                         arrowhead=0, xanchor='left', yanchor='middle',
                         ax=0, ay=0,
                         x=df_regres_daily1.DateRepConf.iloc[-1],
                         y=yyy+5,
                         text=" "*1 + str(yyy.astype('int')),
                         font_color=px.colors.qualitative.Plotly[topreg.index(i)]))
    fig1.add_annotation(dict(xref="x", yref="y", showarrow=False,
                         arrowhead=0, xanchor='left', yanchor='middle',
                         ax=0, ay=0,
                         x=df_regres_daily1.DateRepConf.iloc[-1],
                         y=yyy+5,
                         text=" "*8 + i,
                         font_color=px.colors.qualitative.Plotly[topreg.index(i)]))

for i in topreg[7:8]:
    yyy = df_regres_daily1[(df_regres_daily1.ProvRes == i) & (df_regres_daily1.DateRepConf == pd.to_datetime(df_regres_daily1.DateRepConf.iloc[-1]))].Cum_count.values[0]
    fig1.add_annotation(dict(xref="x", yref="y", showarrow=False,
                         arrowhead=0, xanchor='left', yanchor='middle',
                         ax=0, ay=0,
                         x=df_regres_daily1.DateRepConf.iloc[-1],
                         y=yyy,
                         text=" "*1 + str(yyy.astype('int')),
                         font_color=px.colors.qualitative.Plotly[topreg.index(i)]))
    fig1.add_annotation(dict(xref="x", yref="y", showarrow=False,
                         arrowhead=0, xanchor='left', yanchor='middle',
                         ax=0, ay=0,
                         x=df_regres_daily1.DateRepConf.iloc[-1],
                         y=yyy,
                         text=" "*8 + i,
                         font_color=px.colors.qualitative.Plotly[topreg.index(i)]))

for i in topreg[8:9]:
    yyy = df_regres_daily1[(df_regres_daily1.ProvRes == i) & (df_regres_daily1.DateRepConf == pd.to_datetime(df_regres_daily1.DateRepConf.iloc[-1]))].Cum_count.values[0]
    fig1.add_annotation(dict(xref="x", yref="y", showarrow=False,
                         arrowhead=0, xanchor='left', yanchor='middle',
                         ax=0, ay=0,
                         x=df_regres_daily1.DateRepConf.iloc[-1],
                         y=yyy,
                         text=" "*1 + str(yyy.astype('int')),
                         font_color=px.colors.qualitative.Plotly[topreg.index(i)]))
    fig1.add_annotation(dict(xref="x", yref="y", showarrow=False,
                         arrowhead=0, xanchor='left', yanchor='middle',
                         ax=0, ay=0,
                         x=df_regres_daily1.DateRepConf.iloc[-1],
                         y=yyy,
                         text=" "*8 + i,
                         font_color=px.colors.qualitative.Plotly[topreg.index(i)]))
    
for i in topreg[9:10]:
    yyy = df_regres_daily1[(df_regres_daily1.ProvRes == i) & (df_regres_daily1.DateRepConf == pd.to_datetime(df_regres_daily1.DateRepConf.iloc[-1]))].Cum_count.values[0]
    fig1.add_annotation(dict(xref="x", yref="y", showarrow=False,
                         arrowhead=0, xanchor='left', yanchor='middle',
                         ax=0, ay=0,
                         x=df_regres_daily1.DateRepConf.iloc[-1],
                         y=yyy,
                         text=" "*1 + str(yyy.astype('int')),
                         font_color=px.colors.qualitative.Plotly[topreg.index(i)]))
    fig1.add_annotation(dict(xref="x", yref="y", showarrow=False,
                         arrowhead=0, xanchor='left', yanchor='middle',
                         ax=0, ay=0,
                         x=df_regres_daily1.DateRepConf.iloc[-1],
                         y=yyy,
                         text=" "*8 + i,
                         font_color=px.colors.qualitative.Plotly[topreg.index(i)]))       
# fig1.show()
As of May 26, 2020

NCR Cities

Daily Cases By Area

Metro Manila, Cebu & Other Provinces (With Most Cases)

Note: Trendline (in blue) is based on 7-day Moving Average.

In [54]:
py.plot(fig000, filename='daily_cases_Provinces.html', auto_open=False)
Out[54]:
'daily_cases_Provinces.html'
In [55]:
# prov = ['TARLAC PROVINCE']
# fig000 = make_subplots(rows=5, cols=2, subplot_titles=prov)

# for i in prov: # EVEN NUMBER
#     df = df_regres_daily[df_regres_daily.ProvRes == i].reset_index(drop=True)
#     df = df.rename({'CaseCode': 'Count'}, axis=1)
#     cycle, trend1 = sm.tsa.filters.hpfilter(df['Count'], 7)
#     df['Trend_HP'] = trend1
#     df['Trend_MA7'] = df['Count'].rolling(7).mean()
#     df['Trend_MA3'] = df['Count'].rolling(3).mean()
#     df = df[df.DateRepConf >= '2020-03-01']
    
#     fig1 = px.bar(df[df.ProvRes == i], x='DateRepConf', y='Count',
#                   labels={'DateRepConf': 'Date'},
#                   color_discrete_sequence=['lightblue'])
#     fig1_trend = px.line(df[df.ProvRes == i], x='DateRepConf', y='Trend_HP', line_shape='spline', color_discrete_sequence=["blue"])
    
#     fig000.add_trace(fig1.data[0], row=prov[::2].index(i)+1, col=1)
#     fig000.add_trace(fig1_trend.data[0], row=prov[::2].index(i)+1, col=1)

# fig000.update_xaxes(tickfont_size=10, title="Date", titlefont_size=10)
# fig000.update_yaxes(tickfont_size=10, title="Case Count", titlefont_size=10)
# fig000.update_layout(title=dict(x=0.5), showlegend=False, height=2000,
#                      legend=dict(orientation="h", x=-0.05, y=-0.4, yanchor="bottom"), 
#                      hoverlabel=dict(font_size=18))

    
# fig000.show()

NCR Cities

Note: Trendline (in blue) is based on 7-day Moving Average.

In [57]:
fig000 = make_subplots(rows=9, cols=2, subplot_titles=topcity)

for i in topcity[::2]: # ODD NUMBERS
    df = df_city_daily1[df_city_daily1.CityMunRes == i].reset_index(drop=True)
    cycle, trend1 = sm.tsa.filters.hpfilter(df['Count'], 7)
    df['Trend_HP'] = trend1
    df['Trend_MA7'] = df['Count'].rolling(7).mean()
    df['Trend_MA3'] = df['Count'].rolling(3).mean()
    df = df[df.DateRepConf >= '2020-03-01']
    
    fig1 = px.bar(df[df.CityMunRes == i], x='DateRepConf', y='Count',
                  labels={'DateRepConf': 'Date'},
                  color_discrete_sequence=['lightblue'])
    fig1_trend = px.line(df[df.CityMunRes == i], x='DateRepConf', y='Trend_MA7', line_shape='spline', 
                         color_discrete_sequence=["blue"])
    fig000.add_trace(fig1.data[0], row=topcity[::2].index(i)+1, col=1)
    fig000.add_trace(fig1_trend.data[0], row=topcity[::2].index(i)+1, col=1)
    
for i in topcity[1::2]: # EVEN NUMBERS
    df = df_city_daily1[df_city_daily1.CityMunRes == i].reset_index(drop=True)
    cycle, trend1 = sm.tsa.filters.hpfilter(df['Count'], 7)
    df['Trend_HP'] = trend1
    df['Trend_MA7'] = df['Count'].rolling(7).mean()
    df['Trend_MA3'] = df['Count'].rolling(3).mean()
    df = df[df.DateRepConf >= '2020-03-01']
    
    fig1 = px.bar(df[df.CityMunRes == i], x='DateRepConf', y='Count',
                  labels={'DateRepConf': 'Date'},
                  color_discrete_sequence=['lightblue'])
    fig1_trend = px.line(df[df.CityMunRes == i], x='DateRepConf', y='Trend_MA7', line_shape='spline',
                         color_discrete_sequence=["blue"])
    fig000.add_trace(fig1.data[0], row=topcity[1::2].index(i)+1, col=2)
    fig000.add_trace(fig1_trend.data[0], row=topcity[1::2].index(i)+1, col=2)

fig000.update_xaxes(tickfont_size=10, title="Date", titlefont_size=10)
fig000.update_yaxes(tickfont_size=10, title="Case Count", titlefont_size=10)
fig000.update_layout(title=dict(x=0.5, text="COVID-19 DAILY CASES in NCR"), showlegend=False, height=3200,
                     legend=dict(orientation="h", x=-0.05, y=-0.4, yanchor="bottom"), 
                     hoverlabel=dict(font_size=18))

    
fig000.show()
In [58]:
py.plot(fig000, filename='daily_cases_NCR.html', auto_open=False)
Out[58]:
'daily_cases_NCR.html'

Testing Capacity

In [59]:
df_tests_tot = df_tests.groupby('report_date')[['cumulative_unique_individuals',
                                                'cumulative_positive_individuals', 'cumulative_negative_individuals',
                                                'cumulative_samples_tested', 'remaining_available_tests', 'backlogs']].sum()
# df_tests_tot['invalid'] = df_tests_tot['cumulative_unique_individuals'] - (df_tests_tot['cumulative_positive_individuals'] + df_tests_tot['cumulative_negative_individuals'])
# df_tests_tot['%Invalid'] = 100 - (df_tests_tot['%Positive'] + df_tests_tot['%Negative'])
df_tests_tot['%Positive'] = 100*(df_tests_tot['cumulative_positive_individuals'] / df_tests_tot['cumulative_unique_individuals'])
df_tests_tot['%Negative'] = 100*(df_tests_tot['cumulative_negative_individuals'] / df_tests_tot['cumulative_unique_individuals'])
df_tests_tot['Total_Tests_Available'] = df_tests_tot['cumulative_samples_tested'] + df_tests_tot['remaining_available_tests']
df_tests_tot["backlogs"] = df_tests_tot["backlogs"].replace(0.0, '')
df_tests_tot = df_tests_tot.reset_index()
In [60]:
data = pd.melt(df_tests_tot[-1:], id_vars=['report_date'],
               value_vars=['cumulative_samples_tested', 'remaining_available_tests'],
               var_name='Test_Ratio', value_name = 'Count')
data['Test_Ratio'] = data['Test_Ratio'].replace({'remaining_available_tests': 'Remaining Available Tests Supplies', 
                                                 'cumulative_samples_tested': 'Samples Tested'})
data['Percent'] = (data.Count / data.Count.sum())*100
data['Pct'] = round(data['Percent'],1).astype('str') + '%'
data

q = 100*round(df_tests_tot[-1:].iloc[:,1].values[0] / df_tests_tot[-1:].iloc[:,4].values[0],3)
fig = px.bar(data, x='Percent', orientation='h', color='Test_Ratio', 
             text='Count', hover_name='Pct', height=250,
#              color_discrete_sequence=['#66c2a5', '#84dbc0'])
             color_discrete_sequence=['#64B0A2', '#C0D4CD'])
fig.update_layout(legend=dict(orientation='h', title=""), title="Availability of Test Kits")
fig.update_layout(showlegend=False, xaxis=dict(title=None), hoverlabel=dict(font_size=20))

fig.for_each_trace(lambda t: t.update(textfont_size=16))

for i in range(0, len(data.Percent.unique())):
    fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
                            arrowhead=0, xanchor='right', yanchor='middle',
                            ax=0, ay=0,
                            x=data.Percent.cumsum().unique()[i]-0.001, 
                            y=-0.6,
                            text=data.Test_Ratio[i]), font_size=16,
                       font_color = ['#64B0A2', '#C0D4CD'][i])
    fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
                            arrowhead=0, xanchor='right', yanchor='middle',
                            ax=0, ay=0,
                            x=data.Percent.cumsum().unique()[i]-0.001, 
                            y=0.6,
                            text=data.Pct[i]), font_size=16,
                       font_color = ['#64B0A2', '#C0D4CD'][i])
    
print("As of", df_tests.report_date.max().date().strftime("%b %d, %Y"))
fig.update_xaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False)
fig.update_yaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False)
fig.show()
As of May 25, 2020
Note: There are more positive individuals tested vs publicly reported because of undergoing case validation and processing.
In [62]:
df = pd.melt(df_tests_tot, id_vars=['report_date'],
#             value_vars=['%Negative', '%Positive'], 
             value_vars=['%Positive', '%Negative'], 
             var_name = 'Ratio', value_name = 'Percent')
fig = px.bar(df, x='report_date', y='Percent', color='Ratio', 
             color_discrete_sequence=list(reversed(px.colors.qualitative.Plotly[0:2])))
fig.update_layout(title=dict(x=0.5, text="Historical Testing Ratio on Unique Individuals"),
                  xaxis=dict(tickmode='linear', #tick0=4, 
                             dtick=0, tickfont_size=12, title="Date"),
                  yaxis=dict(tickfont_size=10),
                  legend=dict(orientation="h", title="",
                              x=0, y=0.97, 
                              yanchor="bottom"))

fig.update_xaxes(showticklabels=True, showgrid=False, zeroline=False, showline=False)
fig.update_yaxes(showticklabels=True, showgrid=True, zeroline=False, showline=False)
fig.update_layout(hoverlabel=dict(font_size=20), hovermode='x')
fig.show()

print("Note: Testing ratio here is computed based on the total samples tested as of that date")
Note: Testing ratio here is computed based on the total samples tested as of that date
In [63]:
df_tests_tot1 = df_tests.groupby(['report_date', 'facility_name'])[['cumulative_unique_individuals',
                                                'cumulative_positive_individuals', 'cumulative_negative_individuals',
                                                'cumulative_samples_tested', 'remaining_available_tests', 'backlogs']].sum()
df_tests_tot1['%Positive'] = 100*(df_tests_tot1['cumulative_positive_individuals'] / df_tests_tot1['cumulative_unique_individuals'])
df_tests_tot1['%Negative'] = 100*(df_tests_tot1['cumulative_negative_individuals'] / df_tests_tot1['cumulative_unique_individuals'])
df_tests_tot1['Total_Tests_Available'] = df_tests_tot1['cumulative_samples_tested'] + df_tests_tot1['remaining_available_tests']
df_tests_tot1 = df_tests_tot1.reset_index()
df_test_fac = df_tests_tot1[df_tests_tot1.report_date == df_tests_tot1.report_date.max()].sort_values('cumulative_samples_tested', ascending=False).reset_index(drop=True)
df_test_fac['%Total'] = round(df_test_fac['cumulative_unique_individuals'] / df_test_fac['cumulative_unique_individuals'].sum() * 100, 1)
In [64]:
df_test_fac['Facility'] = 'Testing Facility'
fig = px.treemap(df_test_fac,
                 path=['Facility', 'facility_name'],
                 values='cumulative_unique_individuals',
                 hover_name='%Total',
                 title='Number of Samples Tested by Facility',
                 branchvalues='total', height=700)
fig.update_layout(hoverlabel=dict(font_size=18))
fig.show()
*Computed based on the ratio of cumulative positive cases over cumulative conducted tests.
^These are number of specimens submitted in the laboratory with no results released. Backlog data only started on May 19.
In [66]:
fig = make_subplots(specs=[[{"secondary_y": True}]])

fig1 = px.bar(df_tests_tot, x='report_date', 
              # y='UNIQUE INDIVIDUALS TESTED',
              y='NEW UNIQUE TESTS',
              color_discrete_sequence=["#00CC96"])
fig2 = px.line(df_tests_tot, x='report_date', 
               y='NEW %POSITIVE', 
               color_discrete_sequence=["red"])
fig2.data[0].update(mode='markers+lines')

df_tests_tot['TREND_NAME'] = '%Positive Trend (7-day MA)'
fig_trend = px.line(df_tests_tot, x='report_date', 
               y='TREND', color='TREND_NAME',
               color_discrete_sequence=["red"])

fig.add_trace(fig1.data[0], secondary_y=False)
fig.add_trace(fig2.data[0], secondary_y=True)
fig.add_trace(fig_trend.data[0].update(line={'dash': 'dash'}), secondary_y=True)

fig.update_yaxes(title="%Positive", tickfont_size=10, color='red',
                 range=[0, df_tests_tot['%Positive'].max()*1.1], 
                 showgrid=False, secondary_y=True)
fig.update_yaxes(title='No. of Conducted Test', color='#00CC96',
                 #range=[0, df_tests_tot['UNIQUE INDIVIDUALS TESTED'].max()*1.5], 
                 showgrid=True, tickfont_size=10, secondary_y=False)

fig.update_layout(title=dict(x=0.5, text="Estimated %Positive** on Daily Tests"),
                  xaxis=dict(tickmode='linear', #tick0=4,
                             dtick=0, tickfont_size=10, title="Date"),
                  yaxis=dict(dtick=1000), showlegend=True,
                  legend=dict(orientation="h", title="", 
                              x=0.6, y=1, yanchor="bottom"))

# fig.add_annotation(dict(xref="x", yref="y", showarrow=True, arrowhead=1, 
#                         xanchor='left', yanchor='middle', ax=0, ay=0,
#                         x=pd.to_datetime(df_tests_tot.Date.unique().max()), 
#                         y=df_tests_tot['%POSITIVE'][-1:].values[0],
#                         text=" "*2 + round(df_tests_tot['%POSITIVE'][-1:].values[0],1).astype('str')+'%', 
#                         font_size=14, font_color=px.colors.qualitative.Plotly[1]))

fig.show()

print("**Computed based on the ratio of new positive cases reported over new conducted tests on that day.")
**Computed based on the ratio of new positive cases reported over new conducted tests on that day.

Time of Recovery / Death

Most of these death cases probably needs to be validated if caused by Covid-19 before publicly reported.

Facility Capacity

In [69]:
# f.to_excel("./data/ph_list_of_facilities.xlsx")
In [70]:
df_hosp1 = df_hosp.sort_values(by=['hfhudcode', 'reportdate'], ascending=[True, False]).reset_index(drop=True)
df_hosp2 = df_hosp1.groupby('hfhudcode').first().reset_index()
df_hosp2

df_hosp3 = pd.merge(df_hosp2, f.iloc[:, :15], how='left', left_on='hfhudcode', right_on='HealthFacilityCode')

# Categorizing Birthing Home as Infirmary
df_hosp3['Health Facility Type'] = df_hosp3['Health Facility Type'].replace({'Birthing Home': 'Infirmary'})

Total PH

Hospital Data as of 2020-05-25 00:00:00
Note:
* Only the facilities who reported on the DataCollect App. Also, some hospitals & infirmaries don't have updated counts.
* There are 10 ( 0.5% ) facilities with data but no facility tagging from The National Health Facility Registry.
In [72]:
data003 = pd.DataFrame()
for i in ['icu', 'isolbed', 'beds_ward', 'mechvent']:
    data3 = df_hosp3.groupby(['Ownership Major Classification', 
                              'Health Facility Type'])[[i+'_v', 
                                                        i+'_o']].sum().reset_index().rename({'Ownership Major Classification': 'OwnershipType', 
                                                                                             'Health Facility Type': 'FacilityType'}, axis=1)
    data03 = pd.melt(data3, id_vars=['OwnershipType', 'FacilityType'], 
                     value_vars=[i+'_v', i+'_o'], var_name=['Availability'], 
                     value_name='Count')
    data03['Availability'] = data03['Availability'].replace({i+'_v': 'vacant', i+'_o': 'occupied'})
    data03['Type'] = i
    data003 = data003.append(data03)
    
data003['Type'] = data003['Type'].replace({'icu': 'ICU Beds', 'isolbed': 'Isolation Beds', 
                                           'beds_ward': 'Ward Beds', 'mechvent': 'Mechanical Ventilators'})
data003 = data003.reset_index(drop=True)
data003['Percent'] = data003.groupby(['Type','OwnershipType','FacilityType'])['Count'].apply(lambda x: 100 * x / float(x.sum())).values
data003['Pct'] = round(data003['Percent'],1).astype('str') + '%'
data003['Type'] = data003['Type'].replace({'Mechanical Ventilators': 'Mech Vents'})
data003

##############################################################################

# TOTAL
data100 = data003.groupby(['Type', 'Availability'])['Count'].sum().reset_index()
data100['Percent'] = data100.groupby(['Type'])['Count'].apply(lambda x: 100 * x / float(x.sum())).values
data100['Pct'] = round(data100['Percent'],1).astype('str') + '%'
x = data100[data100.Type.isin(['ICU Beds', 'Isolation Beds', 'Ward Beds'])]
y = data100[data100.Type.isin(['Mech Vents'])]
data100 = pd.concat([x, y])
data100 = data100.sort_values(by='Availability', ascending=False)

fig = px.bar(data100, x='Type', y='Percent', orientation='v', color='Availability', 
             text='Pct', hover_name='Count')
fig.update_layout(legend=dict(orientation='h', x=0.01, y=-0.1, title="", font_size=16), 
                  hoverlabel=dict(font_size=20),
                  title=("Availability of Beds & Mechanical Ventilators in <b>Total PH Facilities</b>"))
fig.for_each_trace(lambda t: t.update(textfont_color='white', textfont_size=18))
fig.update_xaxes(title=None, showticklabels=True, showgrid=False, zeroline=False, showline=False, side='bottom', tickfont_size=16)
fig.update_yaxes(title=None, showticklabels=False, showgrid=False, zeroline=False, showline=False)
fig.update_layout(shapes=[dict(type= 'line', yref= 'paper', y0= -0.05, y1= 0.95, xref= 'x', x0= 2.5, x1= 2.5, 
                               line=dict(color="gray", width=1, dash="dash"))])
for i in range(0, len(data003.Type.unique())):
    fig.add_annotation(dict(xref="x", yref="y", showarrow=False,
                            arrowhead=0, xanchor='center', yanchor='middle',
                            ax=0, ay=0,
                            x=i, 
                            y=-5,
                            text="Count: " + str(data003.groupby('Type').sum().Count[i])), font_size=12, font_color = "gray")
fig.show()

print("Note: These are beds dedicted exclusively for COVID-19 cases. Please interpret with caution, accuracy of data is not assured.")
Note: These are beds dedicted exclusively for COVID-19 cases. Please interpret with caution, accuracy of data is not assured.
Breakdown by Facility Type
In [73]:
# GOVERNMENT - HOSPITAL
data0011 = data003[(data003['OwnershipType'] == 'Government') & (data003['FacilityType'] == 'Hospital')].reset_index(drop=True)
fig3 = px.bar(data0011, x='Type', y='Percent', orientation='v', color='Availability', 
             text='Pct', hover_name='Count', hover_data=None)
fig3.update_layout(legend=dict(orientation='h', x=0.01, y=-0.05, title=""), 
                  hoverlabel=dict(font_size=20), 
                  title=("Availability of Beds & Mechanical Vents in <b>Government Facilities</b>"))
fig3.for_each_trace(lambda t: t.update(textfont_color='white', textfont_size=12))
fig3.update_xaxes(showticklabels=True, showgrid=False, zeroline=False, showline=False, side='top', tickfont_size=14)
fig3.update_yaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False)
# fig3.show()

# GOVERNMENT - INFIRMARIES
data0011 = data003[(data003['OwnershipType'] == 'Government') & (data003['FacilityType'] == 'Infirmary')].reset_index(drop=True)
fig4 = px.bar(data0011, x='Type', y='Percent', orientation='v', color='Availability', 
             text='Pct', hover_name='Count', hover_data=None)
fig4.update_layout(legend=dict(orientation='h', x=0.01, y=-0.05, title=""), 
                  hoverlabel=dict(font_size=20), 
                  title=("Availability of Beds & Mechanical Vents in <b>Private Facilities</b>"))
fig4.for_each_trace(lambda t: t.update(textfont_color='white', textfont_size=12))
fig4.update_xaxes(showticklabels=True, showgrid=False, zeroline=False, showline=False, side='top', tickfont_size=14)
fig4.update_yaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False)
# fig4.show()

# PRIVATE - HOSPITAL 
data0022 = data003[(data003['OwnershipType'] == 'Private') & (data003['FacilityType'] == 'Hospital')].reset_index(drop=True)
fig5 = px.bar(data0022, x='Type', y='Percent', orientation='v', color='Availability', 
             text='Pct', hover_name='Count', hover_data=None)
fig5.update_layout(legend=dict(orientation='h', x=0.01, y=-0.05, title=""), 
                  hoverlabel=dict(font_size=20), 
                  title=("Availability of Beds & Mechanical Vents in <b>Government Facilities</b>"))
fig5.for_each_trace(lambda t: t.update(textfont_color='white', textfont_size=12))
fig5.update_xaxes(showticklabels=True, showgrid=False, zeroline=False, showline=False, side='top', tickfont_size=14)
fig5.update_yaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False)
# fig5.show()

# PRIVATE -INFIRMARIES
data0022 = data003[(data003['OwnershipType'] == 'Private') & (data003['FacilityType'] == 'Infirmary')].reset_index(drop=True)
fig6 = px.bar(data0022, x='Type', y='Percent', orientation='v', color='Availability', 
             text='Pct', hover_name='Count', hover_data=None)
fig6.update_layout(legend=dict(orientation='h', x=0.01, y=-0.05, title=""), 
                  hoverlabel=dict(font_size=20), 
                  title=("Availability of Beds & Mechanical Vents in <b>Private Facilities</b>"))
fig6.for_each_trace(lambda t: t.update(textfont_color='white', textfont_size=12))
fig6.update_xaxes(showticklabels=True, showgrid=False, zeroline=False, showline=False, side='top', tickfont_size=14)
fig6.update_yaxes(showticklabels=False, showgrid=False, zeroline=False, showline=False)
# fig6.show()

# SUBPLOTS
fig000 = make_subplots(rows=2, cols=2, subplot_titles=("Government Hospitals", "Government Infirmaries", 
                                                       "Private Hospitals", "Private Infirmaries"))
for loop in range(0, 2):
    fig000.add_trace(fig3.data[loop], row=1, col=1)
    fig000.add_trace(fig4.data[loop], row=1, col=2)
    fig000.add_trace(fig5.data[loop], row=2, col=1)
    fig000.add_trace(fig6.data[loop], row=2, col=2)
fig000.update_layout(barmode='stack', 
                     xaxis1=dict(tickmode='linear', dtick=0, tickfont_size=12),
                     xaxis2=dict(tickmode='linear', dtick=0, tickfont_size=12))
fig000.update_xaxes(title=None, side='bottom', tickmode='array', ticktext=['a', 'b', 'c', 'd'])
fig000.update_yaxes(title=None, showticklabels=False, showgrid=False, 
                    zeroline=False, showline=False)
fig000.update_layout(title=dict(x=0.5), showlegend=False, height=800,
                     legend=dict(orientation="h", x=-0.05, y=-0.4,
                                 yanchor="bottom"), hoverlabel=dict(font_size=20), 
                     xaxis_tickformat='<br>')
for i in range(0, len(data003.Type.unique())):
    fig000.add_annotation(dict(xref="x1", yref="y1", showarrow=False,
                               arrowhead=0, xanchor='center', yanchor='middle',
                               ax=0, ay=0, x=i, y=-5,
                               text="Count: " + str(int(fig3.data[0]['hovertext'][i]+fig3.data[1]['hovertext'][i])), 
                               font_size=11, font_color = "gray"))
    fig000.add_annotation(dict(xref="x2", yref="y2", showarrow=False,
                               arrowhead=0, xanchor='center', yanchor='middle',
                               ax=0, ay=0, x=i, y=-5,
                               text="Count: " + str(int(fig4.data[0]['hovertext'][i]+fig4.data[1]['hovertext'][i])), 
                               font_size=11, font_color = "gray"))
    fig000.add_annotation(dict(xref="x3", yref="y3", showarrow=False,
                               arrowhead=0, xanchor='center', yanchor='middle',
                               ax=0, ay=0, x=i, y=-5,
                               text="Count: " + str(int(fig5.data[0]['hovertext'][i]+fig5.data[1]['hovertext'][i])), 
                               font_size=11, font_color = "gray"))
    fig000.add_annotation(dict(xref="x4", yref="y4", showarrow=False,
                               arrowhead=0, xanchor='center', yanchor='middle',
                               ax=0, ay=0, x=i, y=-5,
                               text="Count: " + str(int(fig6.data[0]['hovertext'][i]+fig6.data[1]['hovertext'][i])), 
                               font_size=11, font_color = "gray"))

fig000.show()

print("Note: The graphs above only shows utilization in facilities that have submitted to the DOHDataCollect App only.")
Note: The graphs above only shows utilization in facilities that have submitted to the DOHDataCollect App only.

In NCR Only

Note: The graphs above only shows utilization in facilities that have submitted to the DOHDataCollect App only.

R(t) Estimation using Bayesian

Main Reference: Kevin Systrom

In any epidemic, $R_t$ is the measure known as the effective reproduction number. It can be simply defined as the number of people who can become infected per infectious person at time $t$. The most well-known version of this number is the basic reproduction number: $R_0$ when $t=0$. However, $R_0$ is a single measure that does not adapt with changes in behavior and restrictions.

As a pandemic evolves, increasing restrictions (or potential releasing of restrictions) changes $R_t$. Knowing the current $R_t$ is essential. When $R\gg1$, the pandemic will spread through a large part of the population. If $R_t$ $<1$, the pandemic will slow quickly before it has a chance to infect many people. The lower the $R_t$: the more manageable the situation.

Given this, what we want to see is $R_t$ of less than 1 which means that the pandemic in an area is under control.

In [77]:
def prepare_cases(cases):
    new_cases = cases.diff()

    smoothed = new_cases.rolling(7,
        win_type='gaussian',
        min_periods=1,
        center=True).mean(std=2).round()
    
    zeros = smoothed.index[smoothed.eq(0)]
    if len(zeros) == 0:
        idx_start = 0
    else:
        last_zero = zeros.max()
        idx_start = smoothed.index.get_loc(last_zero) + 1
    smoothed = smoothed.iloc[idx_start:]
    original = new_cases.loc[smoothed.index]
    
    return original, smoothed

Note that we will start the computation on March 5, 2020 - the date of the last zero new case. To derive the trend, Gaussian filter was applied to arrive with a smoothed time series. This will be the basis in calculating for each days' likelihood and posteriors as part of the Bayesian approach in estimating $R_t$. To know more about the methodology, you may check out the work of Kevin Systrom.

In [79]:
from scipy import stats as sps

def get_posteriors(sr, sigma=0.15):

    # (1) Calculate Lambda
    lam = sr[:-1].values * np.exp(GAMMA * (r_t_range[:, None] - 1))

    # (2) Calculate each day's likelihood
    likelihoods = pd.DataFrame(
        data = sps.poisson.pmf(sr[1:].values, lam),
        index = r_t_range,
        columns = sr.index[1:])
    
    # (3) Create the Gaussian Matrix
    process_matrix = sps.norm(loc=r_t_range,
                              scale=sigma
                             ).pdf(r_t_range[:, None]) 

    # (3a) Normalize all rows to sum to 1
    process_matrix /= process_matrix.sum(axis=0)
    
    # (4) Calculate the initial prior
    prior0 = sps.gamma(a=4).pdf(r_t_range)
    prior0 /= prior0.sum()

    # Create a DataFrame that will hold our posteriors for each day
    # Insert our prior as the first posterior.
    posteriors = pd.DataFrame(
        index=r_t_range,
        columns=sr.index,
        data={sr.index[0]: prior0}
    )
    
    # We said we'd keep track of the sum of the log of the probability
    # of the data for maximum likelihood calculation.
    log_likelihood = 0.0

    # (5) Iteratively apply Bayes' rule
    for previous_day, current_day in zip(sr.index[:-1], sr.index[1:]):

        #(5a) Calculate the new prior
        current_prior = process_matrix @ posteriors[previous_day]
        
        #(5b) Calculate the numerator of Bayes' Rule: P(k|R_t)P(R_t)
        numerator = likelihoods[current_day] * current_prior
        
        #(5c) Calcluate the denominator of Bayes' Rule P(k)
        denominator = np.sum(numerator)
        
        # Execute full Bayes' Rule
        posteriors[current_day] = numerator/denominator
        
        # Add to the running sum of log likelihoods
        log_likelihood += np.log(denominator)
    
    return posteriors, log_likelihood

# We create an array for every possible value of Rt
R_T_MAX = 12
r_t_range = np.linspace(0, R_T_MAX, R_T_MAX*100+1)

# Gamma is 1/serial interval
# https://wwwnc.cdc.gov/eid/article/26/7/20-0282_article
# https://www.nejm.org/doi/full/10.1056/NEJMoa2001316
GAMMA = 1/7

# Note that we're fixing sigma to a value just for the example
posteriors, log_likelihood = get_posteriors(smoothed, sigma=.15)

Next, since the results include uncertainty, we'd view the most likely value of $R_t$ along with its highest-density interval (HDI). HDI simply indicates which points of a distribution are most credible, and which cover most of the distribution. It can be compared to a confidence interval where an estimate will mostly like falls on a range given a probability ($p$). For this analysis, we will be defining the High HDI and Low HDI given $p=0.9$.

In [81]:
def highest_density_interval(pmf, p=.9):
    # If we pass a DataFrame, just call this recursively on the columns
    if(isinstance(pmf, pd.DataFrame)):
        return pd.DataFrame([highest_density_interval(pmf[col], p=p) for col in pmf],
                            index=pmf.columns)
    
    cumsum = np.cumsum(pmf.values)
    best = None
    for i, value in enumerate(cumsum):
        for j, high_value in enumerate(cumsum[i+1:]):
            if (high_value-value > p) and (not best or j<best[1]-best[0]):
                best = (i, i+j+1)
                break
            
    low = pmf.index[best[0]]
    high = pmf.index[best[1]]
    return pd.Series([low, high], index=[f'Low_{p*100:.0f}', f'High_{p*100:.0f}'])
In [85]:
# results_prov = pd.DataFrame()

# for i in topreg[:7]:
#     df0 = df_regres_daily1[df_regres_daily1['ProvRes'] == i] 
#     df = df0[['DateRepConf', 'Cum_count']][df0.DateRepConf >= '2020-03-05']
#     df1 = df.set_index('DateRepConf')
#     df1 = df1.iloc[:,0].astype('int64')
    
#     idx = pd.date_range('2020-03-05', df1.index[-1].date())
#     df1.index = pd.DatetimeIndex(df1.index)
#     df1 = df1.reindex(idx, fill_value=0)
#     df1

#     original, smoothed = prepare_cases(df1)

#     posteriors, log_likelihood = get_posteriors(smoothed, sigma=.15)

#     # Note that this takes a while to execute - it's not the most efficient algorithm
#     hdis = highest_density_interval(posteriors, p=.9)
#     most_likely = posteriors.idxmax().rename('Estimated Rt')

#     # Look into why you shift -1
#     result_prov = pd.concat([most_likely, hdis], axis=1)
#     result_prov['ProvRes'] = i
#     results_prov = results_prov.append(result_prov)
In [87]:
# def prepare_cases(cases):
#     new_cases = cases.diff()

#     smoothed = new_cases.rolling(7,
#         win_type='gaussian',
#         min_periods=1,
#         center=True).mean(std=2).round()
    
#     zeros = smoothed.index[smoothed.eq(0)]
#     if len(zeros) == 0:
#         idx_start = 0
#     else:
#         last_zero = zeros.max()
#         idx_start = smoothed.index.get_loc(last_zero) + 1
#     smoothed = smoothed.iloc[idx_start:]
#     original = new_cases.loc[smoothed.index]
    
#     return original, smoothed
In [88]:
# results_ncr = pd.DataFrame()
# for i in topcity[:-4]:
#     df0 = df_city_daily1[df_city_daily1['CityMunRes'] == i] 
#     df = df0[['DateRepConf', 'Cum_count']][df0.DateRepConf >= '2020-03-05']
#     df1 = df.set_index('DateRepConf')
#     df1 = df1.iloc[:,0].astype('int64')
    
#     idx = pd.date_range('2020-03-05', df1.index[-1].date())
#     df1.index = pd.DatetimeIndex(df1.index)
#     df1 = df1.reindex(idx, fill_value=0)
#     df1

#     original, smoothed = prepare_cases(df1)

#     posteriors, log_likelihood = get_posteriors(smoothed, sigma=.15)

#     # Note that this takes a while to execute - it's not the most efficient algorithm
#     hdis = highest_density_interval(posteriors, p=.9)
#     most_likely = posteriors.idxmax().rename('Estimated Rt')

#     # Look into why you shift -1
#     result_ncr = pd.concat([most_likely, hdis], axis=1)
#     result_ncr['CityMunRes'] = i
#     results_ncr = results_ncr.append(result_ncr)

Log-Ratio: New vs Total Cases

The growth started to slow down during the first week of April & currently moving (fluctuating) sideways. Yes, it started to plateau but what we need is a downtrend / sudden drop to achieve a "flattening curve".

Doubling Time

Doubling time is the defined as the length of time for the present value of variable to double given the prevailing rate of increase. It is based on the compound accumulation model. Here's the formula for doubling time ${d_{t,y}}$:

${d_{t,y}}$ = $\frac{ln (2)}{ln(1+P_{t,y})}$

where $P_{t,y}$ is the percent change of a value (i.e. Covid-19 total cases) at any time ${t}$

Source: PJ Cayton and JG Sarmiento

In [91]:
# "Epidemic doubling times characterize the sequence of intervals at which the cumulative incidence doubles (3). If an epidemic is growing exponentially with a constant growth rate ${r}$, the doubling time remains constant and equals $\frac{ln 2}{r}$. An increase in the doubling time indicates a slowdown in transmission if the underlying reporting rate remains unchanged." 
# Source: https://wwwnc.cdc.gov/eid/article/26/8/20-0219_article

# To calculate the doubling time for a population (e.g. COVID-19 cases), use the formula = $\frac{x ln(2)}{ln \frac{y}{z}$, where

# - x is the time that has passed since you started measuring. For example, if the number of cases went from 500 on day 0 to 1000 on day 2, x is 2.
# - y is the number of cases on day x, e.g 1000 on day 2.
# - z is the number of cases on day 0, e.g. 500."

# Source: https://blog.datawrapper.de/weekly-chart-coronavirus-doublingtimes/
In [92]:
# # version1
# c2 = c1.copy()
# c2['doubling_time1'] = np.log(2) / (c2['%Change']/100)
# c2['doubling_time2'] = np.log(2) / (c2['trend']/100)

# # version2
# c2 = c1.copy()
# c2['doubling_time1'] = np.log10(2) / (c2['%Change']/100)
# c2['doubling_time2'] = np.log10(2) / (c2['trend']/100)

# version3
c2 = c1.copy()
c2['doubling_time1'] = np.log(2) / np.log(1 + (c2['%Change']/100))
c2['doubling_time2'] = np.log(2) / np.log(1 + (c2['trend']/100))

# version3.1
# c2['doubling_time3'] = c2['doubling_time1'].rolling(7).mean()
# cyclex, trendx = sm.tsa.filters.hpfilter(c2['doubling_time1'], 7)
# c2['doubling_time4'] = trendx

c2['Days'] = 'Doubling Time'
c2['Trend_name1'] = 'Trend (based on HP Filter)'
fig_trend1 = px.line(c2, x='Date', y='doubling_time2', color='Trend_name1', 
                     color_discrete_sequence=["cadetblue"], 
                     labels={'doubling_time2': 'Days-To-Double'})
# c2['Trend_name2'] = 'Trend (based on 7-day rolling mean)'
# fig_trend2 = px.line(c2, x='Date', y='doubling_time4', color='Trend_name2', 
#                      color_discrete_sequence=["cadetblue"])

fig2 = px.line(c2, x='Date', y='doubling_time1', color='Days',
               labels={'doubling_time1': 'Days-To-Double'},
               color_discrete_sequence=["lightblue"])

fig2.add_trace(fig_trend1.data[0].update(line={'dash': 'dash'}))
# fig2.add_trace(fig_trend2.data[0].update(line={'dash': 'dashdot'}))

fig2.update_yaxes(title="Days-to-Double", showgrid=True, tickfont_size=15)
fig2.update_layout(title=dict(x=0.5, text="Doubling Time in Confirmed Cases for PH"),
                   xaxis=dict(tickmode='array', #tick0=4, dtick=0, 
                              tickfont_size=9, title="Date", showgrid=False),
                   legend=dict(orientation="v", title="",
                              x=0.1, y=0.85, yanchor="bottom"), 
                   hoverlabel=dict(font_size=20))
fig2.update_layout(shapes=[dict(type='line', 
                                yref='y', y0=30, y1=30, 
                                xref='paper', x0=0, x1=1, 
                                line=dict(color="orange", width=1))])
fig2.show()

PH vs ASEAN Countries

Data Source: https://www.worldometers.info/coronavirus/

WORLDOMETER Last download:  May 28, 2020 05:58:14
In [98]:
figq = px.bar(df_asean[::-1], x='TotalCases', y='Country,Other',
              orientation='h', text='TotalCases', hover_data=['Pop'],
              labels={'Country,Other': 'Country'}, #hover_data=['ActiveCases'],
              color_discrete_sequence=[px.colors.qualitative.Plotly[0]])

figq.update_traces(textposition='outside', textfont_size=12)
figq.update_yaxes(tickfont_size=15)
figq.update_xaxes(range=(0, df_asean['TotalCases'].max()*1.1))
figq.update_layout(xaxis=dict(tickfont_size=9, title='Number of Cases'),
                   yaxis=dict(title=None),
                   title=dict(x=0.5, text="Covid-19 Confirmed Cases in ASEAN Region"),
                   # legend=dict(orientation="v", title="", x=0.05, y=0.75, yanchor="bottom"), 
                   hoverlabel=dict(font_size=20))

figq.show()

#############################################################################
#############################################################################
#############################################################################

figA = px.bar(df_asean[::-1], x='%Active', y='Country,Other',
              orientation='h', text='%Active', 
              labels={'Country,Other': 'Country'}, hover_data=['ActiveCases'],
              color_discrete_sequence=[px.colors.qualitative.Plotly[0]])

figA.update_traces(textposition='outside', textfont_size=12)
figA.update_layout(xaxis=dict(tickfont_size=9),
                  #title=dict(x=0.5, text="PH Covid-19 Daily Confirmed Cases"),
                  #legend=dict(orientation="v", title="", x=0.05, y=0.75, yanchor="bottom"), 
                  hoverlabel=dict(font_size=20))
# figA.show()

figB = px.bar(df_asean[::-1], x='%Death', y='Country,Other',
              orientation='h', text='%Death', 
              labels={'Country,Other': 'Country'}, hover_data=['TotalDeaths'],
              color_discrete_sequence=[px.colors.qualitative.Plotly[1]])

figB.update_traces(textposition='outside', textfont_size=12)
figB.update_layout(xaxis=dict(tickfont_size=9),
                  #title=dict(x=0.5, text="PH Covid-19 Daily Confirmed Cases"),
                  #legend=dict(orientation="v", title="", x=0.05, y=0.75, yanchor="bottom"), 
                  hoverlabel=dict(font_size=20))
# figA.show()

figC = px.bar(df_asean[::-1], x='%Recovered', y='Country,Other',
              orientation='h', text='%Recovered', 
              labels={'Country,Other': 'Country'}, hover_data=['TotalRecovered'],
              color_discrete_sequence=[px.colors.qualitative.Plotly[2]])

figC.update_traces(textposition='outside', textfont_size=12)
figC.update_layout(xaxis=dict(tickfont_size=9),
                  #title=dict(x=0.5, text="PH Covid-19 Daily Confirmed Cases"),
                  #legend=dict(orientation="v", title="", x=0.05, y=0.75, yanchor="bottom"), 
                  hoverlabel=dict(font_size=20))
# figC.show()

##############################################################################

fig = px.bar(df_asean[::-1], x='Tot\xa0Cases/1M pop', y='Country,Other',
             orientation='h', text='Tot\xa0Cases/1M pop', labels={'Country,Other': 'Country'}, 
             hover_name='Tot\xa0Cases/1M pop', hover_data=['TotalCases', 'Pop'], 
             color_discrete_sequence=[px.colors.qualitative.Plotly[0]])

fig.update_traces(textposition='outside', textfont_size=12)
fig.update_layout(xaxis=dict(tickfont_size=9),
                  #title=dict(x=0.5, text="PH Covid-19 Daily Confirmed Cases"),
                  #legend=dict(orientation="v", title="", x=0.05, y=0.75, yanchor="bottom"), 
                  hoverlabel=dict(font_size=20))
# fig.show()

fig1 = px.bar(df_asean[::-1], x='Deaths/1M pop', y='Country,Other', 
              orientation='h', text='Deaths/1M pop', labels={'Country,Other': 'Country'}, 
              hover_name='Deaths/1M pop', hover_data=['TotalDeaths', 'Pop'], 
              color_discrete_sequence=[px.colors.qualitative.Plotly[1]])

fig1.update_traces(textposition='outside', textfont_size=12)
fig1.update_layout(xaxis=dict(tickfont_size=9),
                  #title=dict(x=0.5, text="PH Covid-19 Daily Confirmed Cases"),
                  #legend=dict(orientation="v", title="", x=0.05, y=0.75, yanchor="bottom"), 
                  hoverlabel=dict(font_size=20))
#fig1.show()

fig2 = px.bar(df_asean[::-1], x='Tests/ 1M pop', y='Country,Other', 
              orientation='h', text='Tests/ 1M pop', labels={'Country,Other': 'Country'},
              hover_name='Tests/ 1M pop', hover_data=['TotalTests', 'Pop'], 
              color_discrete_sequence=[px.colors.qualitative.Plotly[2]])

fig2.update_traces(textposition='outside', textfont_size=12)
fig2.update_layout(xaxis=dict(tickfont_size=9),
                  #title=dict(x=0.5, text="PH Covid-19 Daily Confirmed Cases"),
                  #legend=dict(orientation="v", title="", x=0.05, y=0.75, yanchor="bottom"), 
                  hoverlabel=dict(font_size=20))
#fig2.show()

##############################################################################
##############################################################################
##############################################################################

fig000 = make_subplots(rows=2, cols=3, subplot_titles=("%Active Cases", "%Deaths", "%Recovered", 
                                                       "Cases per 1M Pop'n", "Deaths per 1M Pop'n", "Test per 1M Pop'n"))

# FigA
fig000.add_trace(figA.data[0], row=1, col=1)
fig000.update_yaxes(tickfont_size=15, row=1, col=1)
fig000.update_xaxes(range=(0, df_asean['%Active'].max()*1.25), title='Percent(%)', row=1, col=1)
# fig000.update_xaxes(range=(0, 120), row=1, col=1)

# FigB
fig000.add_trace(figB.data[0], row=1, col=2)
fig000.update_yaxes(showticklabels=False, row=1, col=2)
fig000.update_xaxes(range=(0, df_asean['%Death'].max()*1.5), title='Percent(%)', row=1, col=2)
# fig000.update_xaxes(range=(0, 120), row=1, col=2)

# FigC
fig000.add_trace(figC.data[0], row=1, col=3)
fig000.update_yaxes(showticklabels=False, row=1, col=3)
fig000.update_xaxes(range=(0, df_asean['%Recovered'].max()*1.25), title='Percent(%)', row=1, col=3)
# fig000.update_xaxes(range=(0, 120), row=1, col=3)

##############################################################################
# Fig
fig000.add_trace(fig.data[0], row=2, col=1)
fig000.update_yaxes(tickfont_size=15, row=2, col=1)
fig000.update_xaxes(range=(0, df_asean['Tot\xa0Cases/1M pop'].max()*1.25), title='Count', row=2, col=1)

# Fig1
fig000.add_trace(fig1.data[0], row=2, col=2)
fig000.update_yaxes(showticklabels=False, row=2, col=2)
fig000.update_xaxes(range=(0, df_asean['Deaths/1M pop'].max()*1.25), title='Count', row=2, col=2)

# Fig2
fig000.add_trace(fig2.data[0], row=2, col=3)
fig000.update_yaxes(showticklabels=False, row=2, col=3)
fig000.update_xaxes(range=(0, df_asean['Tests/ 1M pop'].max()*1.25), title='Count', row=2, col=3)

fig000.update_layout(hoverlabel=dict(font_size=20), height=1000)

fig000.show()

Epidepic Modeling using SIR

SIR model is a kind of compartmental model describing the dynamics of infectious disease. You may wonder why it is called the “compartmental model.” The model divides the population into compartments. Each compartment is expected to have the same characteristics. SIR represents the three compartments segmented by the model.

  1. Susceptible is a group of people who are vulnerable to exposure with infectious people. They can be patient when the infection happens.
  2. The group of Infectious represents the infected people. They can pass the disease to susceptible people and can be recovered in a specific period.
  3. Recovered people get immunity so that they are not susceptible to the same illness anymore. SIR model is a framework describing how the number of people in each group can change over time.

Source: https://www.lewuathe.com/covid-19-dynamics-with-sir-model.html

In [99]:
#!/usr/bin/python
import numpy as np
import pandas as pd
from csv import reader
from csv import writer
from scipy.integrate import solve_ivp
from scipy.optimize import minimize
import matplotlib.pyplot as plt
from datetime import timedelta, datetime
import argparse
import sys
import json
import ssl
import urllib.request
In [100]:
# DATA FROM JHU SSE

df_confirmed = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv')
df_deaths = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv')
df_recovered = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv')

country_df_confirmed = df_confirmed[df_confirmed['Country/Region'] == 'Philippines']
country_df_deaths = df_deaths[df_deaths['Country/Region'] == 'Philippines']
country_df_recovered = df_recovered[df_recovered['Country/Region'] == 'Philippines']
In [101]:
start_date = '2/1/20' # First Case

recovered = country_df_recovered.iloc[0].loc[start_date:]
death = country_df_deaths.iloc[0].loc[start_date:]
confirmed = country_df_confirmed.iloc[0].loc[start_date:]
data = confirmed - recovered - death

First, let's set the initial parameters. For the sake of this modeling effort, we will just play with estimates.
Let's say $S_0$ = 100000 which corresponds to the initial susceptible. Meanwhile, we will set $I_0$ = 1 (initial infected) and $R_0$ = 0 (initial recovered), same as the PH statistics we have as of January 30, 2020.

In [103]:
print('Actual data as of:', values[-1])
Actual data as of: 5/27/20
In [104]:
while len(values) < new_size:
    current = current + timedelta(days=1)
    values = np.append(values, datetime.strftime(current, '%m/%d/%y'))
    
new_index = values
size = len(new_index)
print('Forecast until', '(+'+str(dayforecast)+' days):', new_index[-1])
Forecast until (+60 days): 07/26/20
In [105]:
# loss Function based on root MSE
def loss(point, data, recovered, s_0, i_0, r_0):
    size = len(data)
    beta, gamma = point
    
    def SIR(t, y):
        S = y[0]
        I = y[1]
        R = y[2]
        return [-beta*S*I, beta*S*I-gamma*I, gamma*I]
    
    solution = solve_ivp(SIR, [0, size], [s_0, i_0, r_0], t_eval=np.arange(0, size, 1), vectorized=True)
    l1 = np.sqrt(np.mean((solution.y[1] - data)**2))
    l2 = np.sqrt(np.mean((solution.y[2] - recovered)**2))
    alpha = 0.1
    return alpha * l1 + (1 - alpha) * l2

# SIR Model
def SIR(t, y):
    S = y[0]
    I = y[1]
    R = y[2]
    return [-beta*S*I, beta*S*I-gamma*I, gamma*I]

Next, find the values of beta (β) and gamma (γ) that give the smallest residual sum of squares (RSS), which represents the best fit to the data. β is the transmissivity rate or the parameter controlling how much the disease can be transmitted through exposure. It is determined by the chance of contact and the probability of disease transmission. γ is the recovery rate or the parameter expressing how much the disease can be recovered in a specific period.

In [106]:
# Optimizate to estimate the beta and gamma fitting the given confirmed cases
optimal = minimize(loss, [0.001, 0.001], args=(data, recovered, s_0, i_0, r_0), 
                   method='L-BFGS-B', bounds=[(0.00000001, 0.4), (0.00000001, 0.4)])
print(optimal)
      fun: 582.9738391310784
 hess_inv: <2x2 LbfgsInvHessProduct with dtype=float64>
      jac: array([ 1.29773984e+07, -5.04369609e+01])
  message: b'CONVERGENCE: REL_REDUCTION_OF_F_<=_FACTR*EPSMCH'
     nfev: 234
      nit: 11
   status: 0
  success: True
        x: array([1.05462258e-06, 1.84924871e-02])
In [107]:
beta, gamma = optimal.x
print('β: ', beta)
print('γ: ', gamma)
β:  1.0546225784657421e-06
γ:  0.018492487104844316
In [108]:
# Predict Future Values Based on SIR Model
new_index = values
size = len(new_index)
extended_actual = np.concatenate((data.values, [None] * (size - len(data.values))))
extended_recovered = np.concatenate((recovered.values, [None] * (size - len(recovered.values))))
extended_death = np.concatenate((death.values, [None] * (size - len(death.values))))
prediction = solve_ivp(SIR, [0, size], [s_0, i_0, r_0], t_eval=np.arange(0, size, 1))
In [109]:
# Plot the graph
df = pd.DataFrame({'Active Cases (Infected)': extended_actual, 'Recovered Cases': extended_recovered, 'Death Cases': extended_death, 
                   'Susceptible': prediction.y[0], 'Infected': prediction.y[1], 'Recovered': prediction.y[2]}, index=new_index)
df.index = pd.to_datetime(df.index)


df['AC'] = df.columns[0]
df['RC'] = df.columns[1]
df['DC'] = df.columns[2]
df['S'] = df.columns[3]
df['I'] = df.columns[4]
df['R'] = df.columns[5]

fig = px.line(df, x=df.index, y=df.columns[0], color='AC')
fig.add_trace(px.line(df, x=df.index, y=df.columns[1], color='RC', color_discrete_sequence=['green']).data[0])
fig.add_trace(px.line(df, x=df.index, y=df.columns[2], color='DC', color_discrete_sequence=['red']).data[0])
# Forecast
fig.add_trace(px.line(df, x=df.index, y=df.columns[3], color='S', color_discrete_sequence=['orange']).data[0].update(line={'dash': 'dash'}))
fig.add_trace(px.line(df, x=df.index, y=df.columns[4], color='I', color_discrete_sequence=['blue']).data[0].update(line={'dash': 'dash'}))
fig.add_trace(px.line(df, x=df.index, y=df.columns[5], color='R', color_discrete_sequence=['green']).data[0].update(line={'dash': 'dash'}))

fig.update_layout(title=dict(x=0.5, text="SIR Model"),
                  xaxis=dict(tickmode='array', tickfont_size=12, title="Date", showgrid=False),
                  yaxis=dict(title='Cases'),
                  legend=dict(orientation="v", title="", x=0.02, y=0.1, yanchor="bottom"), 
                  hoverlabel=dict(font_size=20))
fig.show()

Logistic Growth Modeling

In a pandemic, the number of cases is expected to have an exponential growth but eventually, it will reach a certain limit given the efforts of the government / health sectors in preventing the spread of the disease. Considering this, we can imagine an "S" curve or in mathematical terms, it's a logistic curve. Therefore, a good way to model is to fit our data in a logistic curve to get better forecast. Thankfully, there is a Python library that we can use.

Prophet is a forecasting procedure developed and released by Facebook’s Core Data Science team. Aside from the typical linear growth time series modeling, it can also handle logistic growth.

Confirmed Cases

In [111]:
#cases_daily = pd.read_excel("./data/cases_daily.xlsx")
cases_daily = confirmed.diff().reset_index().rename({'index': 'DateRepConf', 182: 'CaseCode'}, axis=1)

data = cases_daily.set_index('DateRepConf')
data.index = pd.to_datetime(data.index) ######################################

idx = pd.date_range(data.index[0].date(), data.index[-1].date())
data.index = pd.DatetimeIndex(data.index) 
data = data.reindex(idx, fill_value=0)

data=data.cumsum()
data = data.reset_index(drop=False)
data.columns = ['Timestep', 'Total Cases']
data['Total Cases'] = data['Total Cases'].fillna(0).astype('int64') ##########

The challenge now is to define the "maximum carrying capacity" as required by the logistic growth function of the Prophet tool. Supposedly, this is a known variable but in our case, we really don't know our ceiling (for the 1st wave at least). To make a logical way to define this, let's derive the capacity based on the smallest mean square error (MSE) from the last 30 days of available data. Meaning, we will compute the best fit given a certain range of capacity or ceiling.

Now, we can proceed with the modeling given the capacity computed above. We will also add our forecast based on this on the next 30 days. To consider the uncertainties, we will be defining an interval width at 90% confidence.

In [115]:
from fbprophet.plot import plot_plotly
import plotly.offline as py
py.init_notebook_mode()

fig = plot_plotly(m, forecast, xlabel="Date", ylabel="Case Count", figsize=(1000, 700))
fig.update_layout(title="PH Forecast (Next 30 days) on Total Confirmed Cases", hoverlabel=dict(font_size=20))
fig.update_traces(line=dict(color='blue'))
fig.show()
In [116]:
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

metric_df = forecast.set_index('ds')[['yhat']].join(df.set_index('ds').y).reset_index()
metric_df.dropna(inplace=True)

print("------------ Accuracy ------------")
print("R-squared: ", round(r2_score(metric_df.y, metric_df.yhat),4)*100, '%')
print("Mean Squared Error: ", mean_squared_error(metric_df.y, metric_df.yhat))
print("Mean Absolute Error: ", mean_absolute_error(metric_df.y, metric_df.yhat))
------------ Accuracy ------------
R-squared:  99.08 %
Mean Squared Error:  218094.05162581877
Mean Absolute Error:  381.3835650869009

Deaths

In [119]:
# fig = px.line(lalala, x='Capacity', y='Mean Square Error')
# smallest_mse = lalala[lalala['Mean Square Error'] == lalala['Mean Square Error'].min()]['Capacity'].values[0]
# fig.add_annotation(dict(xref="x", yref="y", showarrow=True,
#                             arrowhead=2, xanchor='center', yanchor='bottom',
#                             ax=0, ay=-100,
#                             x=smallest_mse, y=lalala['Mean Square Error'].min(),
#                             text="Smallest MSE : <br> Capacity = " + str(smallest_mse), font_size=18, font_color = 'blue'))
# fig.update_layout(title="MSE (Based on the last 30 days) for Each Capacity")
# fig.show()
In [120]:
# STEP 0: Capacity computed based on MSE
df = data.copy()
df['cap'] = lalala[lalala['Mean Square Error'] == lalala['Mean Square Error'].min()]['Capacity'].values[0]
df.columns = ['ds', 'y', 'cap']
# df.columns = ['ds', 'y']
df

# STEP 1: Model Fitting
m = Prophet(growth="logistic", interval_width=0.90)
m.fit(df)

# STEP 2: Specify the number of days to extend into the future.
future = m.make_future_dataframe(periods=30)
future['cap'] = df['cap'].iloc[0]

# STEP 3: Predict the future
forecast = m.predict(future)
In [121]:
from fbprophet.plot import plot_plotly
import plotly.offline as py
py.init_notebook_mode()

fig = plot_plotly(m, forecast, xlabel="Date", ylabel="Case Count", figsize=(1000, 700))
fig.update_layout(title="PH Forecast (Next 30 days) on Death Cases", hoverlabel=dict(font_size=20))
fig.update_traces(line=dict(color='red'))
fig.show()
------------ Accuracy ------------
R-squared:  99.48 %
Mean Squared Error:  502.5349268997346
Mean Absolute Error:  17.525537455347994

Recoveries

In [123]:
recoveries_daily = pd.read_excel("./data/recoveries_daily.xlsx")

data = recoveries_daily.set_index('DateRepRem')
data.index = pd.to_datetime(data.index) ######################################

# idx = pd.date_range(data.index[0].date(), data.index[-1].date())
idx = pd.date_range(data.index[0].date(), data.index[-1].date())
data.index = pd.DatetimeIndex(data.index) 
data = data.reindex(idx, fill_value=0)

data=data.cumsum()
data = data.reset_index(drop=False)
data.columns = ['Timestep', 'Total Cases']
data['Total Cases'] = data['Total Cases'].fillna(0).astype('int64') ##########
In [125]:
# fig = px.line(lalala, x='Capacity', y='Mean Square Error')
# smallest_mse = lalala[lalala['Mean Square Error'] == lalala['Mean Square Error'].min()]['Capacity'].values[0]
# fig.add_annotation(dict(xref="x", yref="y", showarrow=True,
#                             arrowhead=2, xanchor='center', yanchor='bottom',
#                             ax=0, ay=-100,
#                             x=smallest_mse, y=lalala['Mean Square Error'].min(),
#                             text="Smallest MSE : <br> Capacity = " + str(smallest_mse), font_size=18, font_color = 'blue'))
# fig.update_layout(title="MSE (Based on the last 30 days) for Each Capacity")
# fig.show()
In [126]:
# STEP 0: Capacity computed based on MSE
df = data.copy()
df['cap'] = lalala[lalala['Mean Square Error'] == lalala['Mean Square Error'].min()]['Capacity'].values[0]
df.columns = ['ds', 'y', 'cap']
# df.columns = ['ds', 'y']
df

# STEP 1: Model Fitting
m = Prophet(growth="logistic", interval_width=0.90)
m.fit(df)

# STEP 2: Specify the number of days to extend into the future.
future = m.make_future_dataframe(periods=30)
future['cap'] = df['cap'].iloc[0]

# STEP 3: Predict the future
forecast = m.predict(future)
In [127]:
from fbprophet.plot import plot_plotly
import plotly.offline as py
py.init_notebook_mode()

fig = plot_plotly(m, forecast, xlabel="Date", ylabel="Case Count", figsize=(1000, 700))
fig.update_layout(title="PH Forecast (Next 30 days) on Recovered Cases", hoverlabel=dict(font_size=20))
fig.update_traces(line=dict(color='green'))
fig.show()
------------ Accuracy ------------
R-squared:  99.8 %
Mean Squared Error:  1925.7136993775998
Mean Absolute Error:  30.751504278869735